From b14bb2e7821bdd133afeb5e623fd6c5a2273ecf6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 18 Sep 2025 12:50:26 +0300 Subject: [PATCH 0001/1075] tee: qcom: prevent potential off by one read Re-order these checks to check if "i" is a valid array index before using it. This prevents a potential off by one read access. Fixes: d6e290837e50 ("tee: add Qualcomm TEE driver") Signed-off-by: Dan Carpenter Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/qcomtee/call.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/qcomtee/call.c b/drivers/tee/qcomtee/call.c index cc17a48d0ab7..ac134452cc9c 100644 --- a/drivers/tee/qcomtee/call.c +++ b/drivers/tee/qcomtee/call.c @@ -308,7 +308,7 @@ static int qcomtee_params_from_args(struct tee_param *params, } /* Release any IO and OO objects not processed. */ - for (; u[i].type && i < num_params; i++) { + for (; i < num_params && u[i].type; i++) { if (u[i].type == QCOMTEE_ARG_TYPE_OO || u[i].type == QCOMTEE_ARG_TYPE_IO) qcomtee_object_put(u[i].o); From a9ee2c461e5c361545f0c45e9f149159ba369c64 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 18 Sep 2025 12:50:41 +0300 Subject: [PATCH 0002/1075] tee: qcom: return -EFAULT instead of -EINVAL if copy_from_user() fails If copy_from_user() fails, the correct error code is -EFAULT, not -EINVAL. Signed-off-by: Dan Carpenter Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/qcomtee/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/qcomtee/core.c b/drivers/tee/qcomtee/core.c index 783acc59cfa9..b6715ada7700 100644 --- a/drivers/tee/qcomtee/core.c +++ b/drivers/tee/qcomtee/core.c @@ -424,7 +424,7 @@ static int qcomtee_prepare_msg(struct qcomtee_object_invoke_ctx *oic, if (!(u[i].flags & QCOMTEE_ARG_FLAGS_UADDR)) memcpy(msgptr, u[i].b.addr, u[i].b.size); else if (copy_from_user(msgptr, u[i].b.uaddr, u[i].b.size)) - return -EINVAL; + return -EFAULT; offset += qcomtee_msg_offset_align(u[i].b.size); ib++; From f12b69d8f22824a07f17c1399c99757072de73e0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 27 Sep 2025 19:39:08 +0200 Subject: [PATCH 0003/1075] batman-adv: Release references to inactive interfaces Trying to dump the originators or the neighbors via netlink for a meshif with an inactive primary interface is not allowed. The dump functions were checking this correctly but they didn't handle non-existing primary interfaces and existing _inactive_ interfaces differently. (Primary) batadv_hard_ifaces hold a references to a net_device. And accessing them is only allowed when either being in a RCU/spinlock protected section or when holding a valid reference to them. The netlink dump functions use the latter. But because the missing specific error handling for inactive primary interfaces, the reference was never dropped. This reference counting error was only detected when the interface should have been removed from the system: unregister_netdevice: waiting for batadv_slave_0 to become free. Usage count = 2 Cc: stable@vger.kernel.org Fixes: 6ecc4fd6c2f4 ("batman-adv: netlink: reduce duplicate code by returning interfaces") Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/originator.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a464ff96b929..ed89d7fd1e7f 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -764,11 +764,16 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + if (!primary_if) { ret = -ENOENT; goto out_put_mesh_iface; } + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out_put_primary_if; + } + hard_iface = batadv_netlink_get_hardif(bat_priv, cb); if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) { ret = PTR_ERR(hard_iface); @@ -1333,11 +1338,16 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + if (!primary_if) { ret = -ENOENT; goto out_put_mesh_iface; } + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out_put_primary_if; + } + hard_iface = batadv_netlink_get_hardif(bat_priv, cb); if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) { ret = PTR_ERR(hard_iface); From 3b63efa21bc6acc1a0fadd1dd0f0e1988a4c0177 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 30 Sep 2025 16:44:27 +0200 Subject: [PATCH 0004/1075] tee: QCOMTEE should depend on ARCH_QCOM The Qualcomm Trusted Execution Environment (QTEE) is only available on Qualcomm SoCs. Hence add a dependency on ARCH_QCOM, to prevent asking the user about this driver when configuring a kernel without Qualcomm platform support. Fixes: d6e290837e50f73f ("tee: add Qualcomm TEE driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Konrad Dybcio Signed-off-by: Jens Wiklander --- drivers/tee/qcomtee/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tee/qcomtee/Kconfig b/drivers/tee/qcomtee/Kconfig index 927686abceb1..9f19dee08db4 100644 --- a/drivers/tee/qcomtee/Kconfig +++ b/drivers/tee/qcomtee/Kconfig @@ -2,6 +2,7 @@ # Qualcomm Trusted Execution Environment Configuration config QCOMTEE tristate "Qualcomm TEE Support" + depends on ARCH_QCOM || COMPILE_TEST depends on !CPU_BIG_ENDIAN select QCOM_SCM select QCOM_TZMEM_MODE_SHMBRIDGE From 4092fc5f35cecb01d59b2cdf7740b203eac6948a Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 1 Oct 2025 19:31:12 +0100 Subject: [PATCH 0005/1075] spi: dt-bindings: cadence: add soc-specific compatible strings for zynqmp and versal-net When the binding for the Cadence spi controller was written, a dedicated compatible was added for the zynq device. Later when zynqmp and versal-net, which also use this spi controller IP, were added they did not receive soc-specific compatibles. Add them now, with a fallback to the existing compatible for the r1p6 version of the IP so that there will be no functional change. Retain the r1p6 in the string, to match what was done for zynq. Disallow the cdns,spi-r1p6 compatible in isolation to "encourage" people to actually add soc-specific compatible strings in the future. Signed-off-by: Conor Dooley Acked-by: Michal Simek Link: https://patch.msgid.link/20251001-basics-grafting-a1a214ef65ac@spud Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/spi-cadence.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/spi-cadence.yaml b/Documentation/devicetree/bindings/spi/spi-cadence.yaml index 8de96abe9da1..27414b78d61d 100644 --- a/Documentation/devicetree/bindings/spi/spi-cadence.yaml +++ b/Documentation/devicetree/bindings/spi/spi-cadence.yaml @@ -14,9 +14,14 @@ allOf: properties: compatible: - enum: - - cdns,spi-r1p6 - - xlnx,zynq-spi-r1p6 + oneOf: + - enum: + - xlnx,zynq-spi-r1p6 + - items: + - enum: + - xlnx,zynqmp-spi-r1p6 + - xlnx,versal-net-spi-r1p6 + - const: cdns,spi-r1p6 reg: maxItems: 1 From 93a4b36ef3cf4ce5e6a7e7a7686181de76e246a1 Mon Sep 17 00:00:00 2001 From: Nirbhay Sharma Date: Fri, 3 Oct 2025 17:15:55 +0530 Subject: [PATCH 0006/1075] cgroup: Fix seqcount lockdep assertion in cgroup freezer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit afa3701c0e45 ("cgroup: cgroup.stat.local time accounting") introduced a seqcount to track freeze timing but initialized it as a plain seqcount_t using seqcount_init(). However, the write-side critical section in cgroup_do_freeze() holds the css_set_lock spinlock while calling write_seqcount_begin(). On PREEMPT_RT kernels, spinlocks do not disable preemption, causing the lockdep assertion for a plain seqcount_t, which checks for preemption being disabled, to fail. This triggers the following warning: WARNING: CPU: 0 PID: 9692 at include/linux/seqlock.h:221 Fix this by changing the type to seqcount_spinlock_t and initializing it with seqcount_spinlock_init() to associate css_set_lock with the seqcount. This allows lockdep to correctly validate that the spinlock is held during write operations, resolving the assertion failure on all kernel configurations. Reported-by: syzbot+27a2519eb4dad86d0156@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=27a2519eb4dad86d0156 Fixes: afa3701c0e45 ("cgroup: cgroup.stat.local time accounting") Signed-off-by: Nirbhay Sharma Link: https://lore.kernel.org/r/20251002165510.KtY3IT--@linutronix.de/ Acked-by: Michal Koutný Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- kernel/cgroup/cgroup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 93318fce31f3..b760a3c470a5 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -452,7 +452,7 @@ struct cgroup_freezer_state { int nr_frozen_tasks; /* Freeze time data consistency protection */ - seqcount_t freeze_seq; + seqcount_spinlock_t freeze_seq; /* * Most recent time the cgroup was requested to freeze. diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6ae5f48cf64e..fdee387f0d6b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5892,7 +5892,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, * if the parent has to be frozen, the child has too. */ cgrp->freezer.e_freeze = parent->freezer.e_freeze; - seqcount_init(&cgrp->freezer.freeze_seq); + seqcount_spinlock_init(&cgrp->freezer.freeze_seq, &css_set_lock); if (cgrp->freezer.e_freeze) { /* * Set the CGRP_FREEZE flag, so when a process will be From 48b77733d0dbaf8cd0a122712072f92b2d95d894 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Oct 2025 15:19:07 +0200 Subject: [PATCH 0007/1075] expfs: Fix exportfs_can_encode_fh() for EXPORT_FH_FID After commit 5402c4d4d200 ("exportfs: require ->fh_to_parent() to encode connectable file handles") we will fail to create non-decodable file handles for filesystems without export operations. Fix it. Fixes: 5402c4d4d200 ("exportfs: require ->fh_to_parent() to encode connectable file handles") Reviewed-by: Christian Brauner Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/exportfs.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index d0cf10d5e0f7..f0cf2714ec52 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -320,9 +320,6 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { - if (!nop) - return false; - /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -330,6 +327,10 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, if (fh_flags & EXPORT_FH_FID) return exportfs_can_encode_fid(nop); + /* Normal file handles cannot be created without export ops */ + if (!nop) + return false; + /* * If a connectable file handle was requested, we need to make sure that * filesystem can also decode connected file handles. From ee795e82e10197c070efd380dc9615c73dffad6c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 3 Oct 2025 13:42:39 +0200 Subject: [PATCH 0008/1075] spi: rockchip-sfc: Fix DMA-API usage Use DMA-API dma_map_single() call for getting the DMA address of the transfer buffer instead of hacking with virt_to_phys(). This fixes the following DMA-API debug warning: ------------[ cut here ]------------ DMA-API: rockchip-sfc fe300000.spi: device driver tries to sync DMA memory it has not allocated [device address=0x000000000cf70000] [size=288 bytes] WARNING: kernel/dma/debug.c:1106 at check_sync+0x1d8/0x690, CPU#2: systemd-udevd/151 Modules linked in: ... Hardware name: Hardkernel ODROID-M1 (DT) pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : check_sync+0x1d8/0x690 lr : check_sync+0x1d8/0x690 .. Call trace: check_sync+0x1d8/0x690 (P) debug_dma_sync_single_for_cpu+0x84/0x8c __dma_sync_single_for_cpu+0x88/0x234 rockchip_sfc_exec_mem_op+0x4a0/0x798 [spi_rockchip_sfc] spi_mem_exec_op+0x408/0x498 spi_nor_read_data+0x170/0x184 spi_nor_read_sfdp+0x74/0xe4 spi_nor_parse_sfdp+0x120/0x11f0 spi_nor_sfdp_init_params_deprecated+0x3c/0x8c spi_nor_scan+0x690/0xf88 spi_nor_probe+0xe4/0x304 spi_mem_probe+0x6c/0xa8 spi_probe+0x94/0xd4 really_probe+0xbc/0x298 ... Fixes: b69386fcbc60 ("spi: rockchip-sfc: Using normal memory for dma") Signed-off-by: Marek Szyprowski Link: https://patch.msgid.link/20251003114239.431114-1-m.szyprowski@samsung.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip-sfc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c index 9eba5c0a60f2..b3c2b03b1153 100644 --- a/drivers/spi/spi-rockchip-sfc.c +++ b/drivers/spi/spi-rockchip-sfc.c @@ -704,7 +704,12 @@ static int rockchip_sfc_probe(struct platform_device *pdev) ret = -ENOMEM; goto err_dma; } - sfc->dma_buffer = virt_to_phys(sfc->buffer); + sfc->dma_buffer = dma_map_single(dev, sfc->buffer, + sfc->max_iosize, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, sfc->dma_buffer)) { + ret = -ENOMEM; + goto err_dma_map; + } } ret = devm_spi_register_controller(dev, host); @@ -715,6 +720,9 @@ static int rockchip_sfc_probe(struct platform_device *pdev) return 0; err_register: + dma_unmap_single(dev, sfc->dma_buffer, sfc->max_iosize, + DMA_BIDIRECTIONAL); +err_dma_map: free_pages((unsigned long)sfc->buffer, get_order(sfc->max_iosize)); err_dma: pm_runtime_get_sync(dev); @@ -736,6 +744,8 @@ static void rockchip_sfc_remove(struct platform_device *pdev) struct spi_controller *host = sfc->host; spi_unregister_controller(host); + dma_unmap_single(&pdev->dev, sfc->dma_buffer, sfc->max_iosize, + DMA_BIDIRECTIONAL); free_pages((unsigned long)sfc->buffer, get_order(sfc->max_iosize)); clk_disable_unprepare(sfc->clk); From a7c4bb43bfdc2b9f06ee9d036028ed13a83df42a Mon Sep 17 00:00:00 2001 From: Jakub Acs Date: Wed, 1 Oct 2025 10:09:55 +0000 Subject: [PATCH 0009/1075] fs/notify: call exportfs_encode_fid with s_umount Calling intotify_show_fdinfo() on fd watching an overlayfs inode, while the overlayfs is being unmounted, can lead to dereferencing NULL ptr. This issue was found by syzkaller. Race Condition Diagram: Thread 1 Thread 2 -------- -------- generic_shutdown_super() shrink_dcache_for_umount sb->s_root = NULL | | vfs_read() | inotify_fdinfo() | * inode get from mark * | show_mark_fhandle(m, inode) | exportfs_encode_fid(inode, ..) | ovl_encode_fh(inode, ..) | ovl_check_encode_origin(inode) | * deref i_sb->s_root * | | v fsnotify_sb_delete(sb) Which then leads to: [ 32.133461] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN NOPTI [ 32.134438] KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] [ 32.135032] CPU: 1 UID: 0 PID: 4468 Comm: systemd-coredum Not tainted 6.17.0-rc6 #22 PREEMPT(none) [ 32.143353] Call Trace: [ 32.143732] ovl_encode_fh+0xd5/0x170 [ 32.144031] exportfs_encode_inode_fh+0x12f/0x300 [ 32.144425] show_mark_fhandle+0xbe/0x1f0 [ 32.145805] inotify_fdinfo+0x226/0x2d0 [ 32.146442] inotify_show_fdinfo+0x1c5/0x350 [ 32.147168] seq_show+0x530/0x6f0 [ 32.147449] seq_read_iter+0x503/0x12a0 [ 32.148419] seq_read+0x31f/0x410 [ 32.150714] vfs_read+0x1f0/0x9e0 [ 32.152297] ksys_read+0x125/0x240 IOW ovl_check_encode_origin derefs inode->i_sb->s_root, after it was set to NULL in the unmount path. Fix it by protecting calling exportfs_encode_fid() from show_mark_fhandle() with s_umount lock. This form of fix was suggested by Amir in [1]. [1]: https://lore.kernel.org/all/CAOQ4uxhbDwhb+2Brs1UdkoF0a3NSdBAOQPNfEHjahrgoKJpLEw@mail.gmail.com/ Fixes: c45beebfde34 ("ovl: support encoding fid from inode with no alias") Signed-off-by: Jakub Acs Cc: Jan Kara Cc: Amir Goldstein Cc: Miklos Szeredi Cc: Christian Brauner Cc: linux-unionfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/notify/fdinfo.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index 1161eabf11ee..9cc7eb863643 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -17,6 +17,7 @@ #include "fanotify/fanotify.h" #include "fdinfo.h" #include "fsnotify.h" +#include "../internal.h" #if defined(CONFIG_PROC_FS) @@ -46,7 +47,12 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode) size = f->handle_bytes >> 2; + if (!super_trylock_shared(inode->i_sb)) + return; + ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size); + up_read(&inode->i_sb->s_umount); + if ((ret == FILEID_INVALID) || (ret < 0)) return; From 2e9c1da4ee9d0acfca2e0a3d78f3d8cb5802da1b Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 26 Sep 2025 21:56:56 +0200 Subject: [PATCH 0010/1075] wifi: ath10k: Fix memory leak on unsupported WMI command ath10k_wmi_cmd_send takes ownership of the passed buffer (skb) and has the responsibility to release it in case of error. This patch fixes missing free in case of early error due to unhandled WMI command ID. Tested-on: WCN3990 hw1.0 WLAN.HL.3.3.7.c2-00931-QCAHLSWMTPLZ-1 Fixes: 553215592f14 ("ath10k: warn if give WMI command is not supported") Suggested-by: Jeff Johnson Signed-off-by: Loic Poulain Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250926195656.187970-1-loic.poulain@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index cb8ae751eb31..b4aad6604d6d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1938,6 +1938,7 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id) if (cmd_id == WMI_CMD_UNSUPPORTED) { ath10k_warn(ar, "wmi command %d is not supported by firmware\n", cmd_id); + dev_kfree_skb_any(skb); return ret; } From 0eb002c93c3b47f88244cecb1e356eaeab61a6bf Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 29 Sep 2025 15:21:35 -0400 Subject: [PATCH 0011/1075] wifi: ath11k: Add missing platform IDs for quirk table Lenovo platforms can come with one of two different IDs. The pm_quirk table was missing the second ID for each platform. Add missing ID and some extra platform identification comments. Reported on https://bugzilla.kernel.org/show_bug.cgi?id=219196 Tested-on: P14s G4 AMD. Fixes: ce8669a27016 ("wifi: ath11k: determine PM policy based on machine model") Signed-off-by: Mark Pearson Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219196 Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250929192146.1789648-1-mpearson-lenovo@squebb.ca Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.c | 54 +++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index d49353b6b2e7..e9618432cd2f 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -912,42 +912,84 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { static const struct dmi_system_id ath11k_pm_quirk_table[] = { { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* X13 G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J3"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* X13 G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21J4"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T14 G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K3"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T14 G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K4"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* P14s G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K5"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* P14s G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K6"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T16 G2 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K7"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T16 G2 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K8"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* P16s G2 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K9"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* P16s G2 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21KA"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T14s G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21F8"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T14s G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21F9"), }, From 92282074e1d2e7b6da5c05fe38a7cc974187fe14 Mon Sep 17 00:00:00 2001 From: Karthik M Date: Tue, 23 Sep 2025 15:03:16 -0700 Subject: [PATCH 0012/1075] wifi: ath12k: free skb during idr cleanup callback ath12k just like ath11k [1] did not handle skb cleanup during idr cleanup callback. Both ath12k_mac_vif_txmgmt_idr_remove() and ath12k_mac_tx_mgmt_pending_free() performed idr cleanup and DMA unmapping for skb but only ath12k_mac_tx_mgmt_pending_free() freed skb. As a result, during vdev deletion a memory leak occurs. Refactor all clean up steps into a new function. New function ath12k_mac_tx_mgmt_free() creates a centralized area where idr cleanup, DMA unmapping for skb and freeing skb is performed. Utilize skb pointer given by idr_remove(), instead of passed as a function argument because IDR will be protected by locking. This will prevent concurrent modification of the same IDR. Now ath12k_mac_tx_mgmt_pending_free() and ath12k_mac_vif_txmgmt_idr_remove() call ath12k_mac_tx_mgmt_free(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Link: https://lore.kernel.org/r/1637832614-13831-1-git-send-email-quic_srirrama@quicinc.com > # [1] Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Karthik M Signed-off-by: Muna Sinada Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250923220316.1595758-1-muna.sinada@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 34 ++++++++++++++------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 3a3965b79942..2fad2df1d6ce 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -8304,23 +8304,32 @@ static void ath12k_mgmt_over_wmi_tx_drop(struct ath12k *ar, struct sk_buff *skb) wake_up(&ar->txmgmt_empty_waitq); } -int ath12k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +static void ath12k_mac_tx_mgmt_free(struct ath12k *ar, int buf_id) { - struct sk_buff *msdu = skb; + struct sk_buff *msdu; struct ieee80211_tx_info *info; - struct ath12k *ar = ctx; - struct ath12k_base *ab = ar->ab; spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); + msdu = idr_remove(&ar->txmgmt_idr, buf_id); spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, ATH12K_SKB_CB(msdu)->paddr, msdu->len, + + if (!msdu) + return; + + dma_unmap_single(ar->ab->dev, ATH12K_SKB_CB(msdu)->paddr, msdu->len, DMA_TO_DEVICE); info = IEEE80211_SKB_CB(msdu); memset(&info->status, 0, sizeof(info->status)); - ath12k_mgmt_over_wmi_tx_drop(ar, skb); + ath12k_mgmt_over_wmi_tx_drop(ar, msdu); +} + +int ath12k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +{ + struct ath12k *ar = ctx; + + ath12k_mac_tx_mgmt_free(ar, buf_id); return 0; } @@ -8329,17 +8338,10 @@ static int ath12k_mac_vif_txmgmt_idr_remove(int buf_id, void *skb, void *ctx) { struct ieee80211_vif *vif = ctx; struct ath12k_skb_cb *skb_cb = ATH12K_SKB_CB(skb); - struct sk_buff *msdu = skb; struct ath12k *ar = skb_cb->ar; - struct ath12k_base *ab = ar->ab; - if (skb_cb->vif == vif) { - spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); - spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, skb_cb->paddr, msdu->len, - DMA_TO_DEVICE); - } + if (skb_cb->vif == vif) + ath12k_mac_tx_mgmt_free(ar, buf_id); return 0; } From 9c78e747dd4fee6c36fcc926212e20032055cf9d Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Fri, 3 Oct 2025 14:51:58 +0530 Subject: [PATCH 0013/1075] wifi: ath11k: avoid bit operation on key flags Bitwise operations with WMI_KEY_PAIRWISE (defined as 0) are ineffective and misleading. This results in pairwise key validations added in commit 97acb0259cc9 ("wifi: ath11k: fix group data packet drops during rekey") to always evaluate false and clear key commands for pairwise keys are not honored. Since firmware supports overwriting the new key without explicitly clearing the previous one, there is no visible impact currently. However, to restore consistency with the previous behavior and improve clarity, replace bitwise operations with direct assignments and comparisons for key flags. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.41 Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-wireless/aLlaetkalDvWcB7b@stanley.mountain Fixes: 97acb0259cc9 ("wifi: ath11k: fix group data packet drops during rekey") Signed-off-by: Rameshkumar Sundaram Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251003092158.1080637-1-rameshkumar.sundaram@oss.qualcomm.com [update copyright per current guidance] Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 106e2530b64e..0e41b5a91d66 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -4417,9 +4417,9 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) - flags |= WMI_KEY_PAIRWISE; + flags = WMI_KEY_PAIRWISE; else - flags |= WMI_KEY_GROUP; + flags = WMI_KEY_GROUP; ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "%s for peer %pM on vdev %d flags 0x%X, type = %d, num_sta %d\n", @@ -4456,7 +4456,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, is_ap_with_no_sta = (vif->type == NL80211_IFTYPE_AP && !arvif->num_stations); - if ((flags & WMI_KEY_PAIRWISE) || cmd == SET_KEY || is_ap_with_no_sta) { + if (flags == WMI_KEY_PAIRWISE || cmd == SET_KEY || is_ap_with_no_sta) { ret = ath11k_install_key(arvif, key, cmd, peer_addr, flags); if (ret) { ath11k_warn(ab, "ath11k_install_key failed (%d)\n", ret); @@ -4470,7 +4470,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, goto exit; } - if ((flags & WMI_KEY_GROUP) && cmd == SET_KEY && is_ap_with_no_sta) + if (flags == WMI_KEY_GROUP && cmd == SET_KEY && is_ap_with_no_sta) arvif->reinstall_group_keys = true; } From c34e08ba6c0037a72a7433741225b020c989e4ae Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 23 Sep 2025 07:04:40 -0700 Subject: [PATCH 0014/1075] drm/msm: Fix GEM free for imported dma-bufs Imported dma-bufs also have obj->resv != &obj->_resv. So we should check both this condition in addition to flags for handling the _NO_SHARE case. Fixes this splat that was reported with IRIS video playback: ------------[ cut here ]------------ WARNING: CPU: 3 PID: 2040 at drivers/gpu/drm/msm/msm_gem.c:1127 msm_gem_free_object+0x1f8/0x264 [msm] CPU: 3 UID: 1000 PID: 2040 Comm: .gnome-shell-wr Not tainted 6.17.0-rc7 #1 PREEMPT pstate: 81400005 (Nzcv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : msm_gem_free_object+0x1f8/0x264 [msm] lr : msm_gem_free_object+0x138/0x264 [msm] sp : ffff800092a1bb30 x29: ffff800092a1bb80 x28: ffff800092a1bce8 x27: ffffbc702dbdbe08 x26: 0000000000000008 x25: 0000000000000009 x24: 00000000000000a6 x23: ffff00083c72f850 x22: ffff00083c72f868 x21: ffff00087e69f200 x20: ffff00087e69f330 x19: ffff00084d157ae0 x18: 0000000000000000 x17: 0000000000000000 x16: ffffbc704bd46b80 x15: 0000ffffd0959540 x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: ffffbc702e6cdb48 x10: 0000000000000000 x9 : 000000000000003f x8 : ffff800092a1ba90 x7 : 0000000000000000 x6 : 0000000000000020 x5 : ffffbc704bd46c40 x4 : fffffdffe102cf60 x3 : 0000000000400032 x2 : 0000000000020000 x1 : ffff00087e6978e8 x0 : ffff00087e6977e8 Call trace: msm_gem_free_object+0x1f8/0x264 [msm] (P) drm_gem_object_free+0x1c/0x30 [drm] drm_gem_object_handle_put_unlocked+0x138/0x150 [drm] drm_gem_object_release_handle+0x5c/0xcc [drm] drm_gem_handle_delete+0x68/0xbc [drm] drm_gem_close_ioctl+0x34/0x40 [drm] drm_ioctl_kernel+0xc0/0x130 [drm] drm_ioctl+0x360/0x4e0 [drm] __arm64_sys_ioctl+0xac/0x104 invoke_syscall+0x48/0x104 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x34/0xec el0t_64_sync_handler+0xa0/0xe4 el0t_64_sync+0x198/0x19c ---[ end trace 0000000000000000 ]--- ------------[ cut here ]------------ Reported-by: Stephan Gerhold Fixes: de651b6e040b ("drm/msm: Fix refcnt underflow in error path") Signed-off-by: Rob Clark Tested-by: Stephan Gerhold Tested-by: Luca Weiss Tested-by: Bryan O'Donoghue # qrb5165-rb5 Patchwork: https://patchwork.freedesktop.org/patch/676273/ Message-ID: <20250923140441.746081-1-robin.clark@oss.qualcomm.com> --- drivers/gpu/drm/msm/msm_gem.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index e6cb51f21d97..a9d21a7b8984 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1121,12 +1121,16 @@ static void msm_gem_free_object(struct drm_gem_object *obj) put_pages(obj); } - if (obj->resv != &obj->_resv) { + /* + * In error paths, we could end up here before msm_gem_new_handle() + * has changed obj->resv to point to the shared resv. In this case, + * we don't want to drop a ref to the shared r_obj that we haven't + * taken yet. + */ + if ((msm_obj->flags & MSM_BO_NO_SHARE) && (obj->resv != &obj->_resv)) { struct drm_gem_object *r_obj = container_of(obj->resv, struct drm_gem_object, _resv); - WARN_ON(!(msm_obj->flags & MSM_BO_NO_SHARE)); - /* Drop reference we hold to shared resv obj: */ drm_gem_object_put(r_obj); } From b69ffeaa0ae43892683113b3f4ddf156398738b9 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 1 Oct 2025 22:05:30 -0700 Subject: [PATCH 0015/1075] scsi: storvsc: Prefer returning channel with the same CPU as on the I/O issuing CPU When selecting an outgoing channel for I/O, storvsc tries to select a channel with a returning CPU that is not the same as issuing CPU. This worked well in the past, however it doesn't work well when the Hyper-V exposes a large number of channels (up to the number of all CPUs). Use a different CPU for returning channel is not efficient on Hyper-V. Change this behavior by preferring to the channel with the same CPU as the current I/O issuing CPU whenever possible. Tests have shown improvements in newer Hyper-V/Azure environment, and no regression with older Hyper-V/Azure environments. Tested-by: Raheel Abdul Faizy Signed-off-by: Long Li Message-Id: <1759381530-7414-1-git-send-email-longli@linux.microsoft.com> Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 98 ++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 52 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 7449743930d2..7fb57dca86e2 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1406,14 +1406,19 @@ static struct vmbus_channel *get_og_chn(struct storvsc_device *stor_device, } /* - * Our channel array is sparsley populated and we + * Our channel array could be sparsley populated and we * initiated I/O on a processor/hw-q that does not * currently have a designated channel. Fix this. * The strategy is simple: - * I. Ensure NUMA locality - * II. Distribute evenly (best effort) + * I. Prefer the channel associated with the current CPU + * II. Ensure NUMA locality + * III. Distribute evenly (best effort) */ + /* Prefer the channel on the I/O issuing processor/hw-q */ + if (cpumask_test_cpu(q_num, &stor_device->alloced_cpus)) + return stor_device->stor_chns[q_num]; + node_mask = cpumask_of_node(cpu_to_node(q_num)); num_channels = 0; @@ -1469,59 +1474,48 @@ static int storvsc_do_io(struct hv_device *device, /* See storvsc_change_target_cpu(). */ outgoing_channel = READ_ONCE(stor_device->stor_chns[q_num]); if (outgoing_channel != NULL) { - if (outgoing_channel->target_cpu == q_num) { - /* - * Ideally, we want to pick a different channel if - * available on the same NUMA node. - */ - node_mask = cpumask_of_node(cpu_to_node(q_num)); - for_each_cpu_wrap(tgt_cpu, - &stor_device->alloced_cpus, q_num + 1) { - if (!cpumask_test_cpu(tgt_cpu, node_mask)) - continue; - if (tgt_cpu == q_num) - continue; - channel = READ_ONCE( - stor_device->stor_chns[tgt_cpu]); - if (channel == NULL) - continue; - if (hv_get_avail_to_write_percent( - &channel->outbound) - > ring_avail_percent_lowater) { - outgoing_channel = channel; - goto found_channel; - } - } + if (hv_get_avail_to_write_percent(&outgoing_channel->outbound) + > ring_avail_percent_lowater) + goto found_channel; - /* - * All the other channels on the same NUMA node are - * busy. Try to use the channel on the current CPU - */ - if (hv_get_avail_to_write_percent( - &outgoing_channel->outbound) - > ring_avail_percent_lowater) + /* + * Channel is busy, try to find a channel on the same NUMA node + */ + node_mask = cpumask_of_node(cpu_to_node(q_num)); + for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus, + q_num + 1) { + if (!cpumask_test_cpu(tgt_cpu, node_mask)) + continue; + channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]); + if (!channel) + continue; + if (hv_get_avail_to_write_percent(&channel->outbound) + > ring_avail_percent_lowater) { + outgoing_channel = channel; goto found_channel; - - /* - * If we reach here, all the channels on the current - * NUMA node are busy. Try to find a channel in - * other NUMA nodes - */ - for_each_cpu(tgt_cpu, &stor_device->alloced_cpus) { - if (cpumask_test_cpu(tgt_cpu, node_mask)) - continue; - channel = READ_ONCE( - stor_device->stor_chns[tgt_cpu]); - if (channel == NULL) - continue; - if (hv_get_avail_to_write_percent( - &channel->outbound) - > ring_avail_percent_lowater) { - outgoing_channel = channel; - goto found_channel; - } } } + + /* + * If we reach here, all the channels on the current + * NUMA node are busy. Try to find a channel in + * all NUMA nodes + */ + for_each_cpu_wrap(tgt_cpu, &stor_device->alloced_cpus, + q_num + 1) { + channel = READ_ONCE(stor_device->stor_chns[tgt_cpu]); + if (!channel) + continue; + if (hv_get_avail_to_write_percent(&channel->outbound) + > ring_avail_percent_lowater) { + outgoing_channel = channel; + goto found_channel; + } + } + /* + * If we reach here, all the channels are busy. Use the + * original channel found. + */ } else { spin_lock_irqsave(&stor_device->lock, flags); outgoing_channel = stor_device->stor_chns[q_num]; From 987da233b2982c686a8ea5cd4c76f0bd5e957ee3 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 29 Sep 2025 02:25:54 -0700 Subject: [PATCH 0016/1075] scsi: qla4xxx: Fix typos in comments Fix several spelling mistakes in qla4xxx driver comments: "Unfortunely" -> "Unfortunately" "becase" -> "because" "funcions" -> "functions" "targer_id" -> "target_id" Signed-off-by: Alok Tiwari Signed-off-by: Martin K. Petersen --- drivers/scsi/qla4xxx/ql4_os.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index a39f1da4ce47..e4ab541561d0 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -4104,7 +4104,7 @@ void qla4xxx_srb_compl(struct kref *ref) * The mid-level driver tries to ensure that queuecommand never gets * invoked concurrently with itself or the interrupt handler (although * the interrupt handler may call this routine as part of request- - * completion handling). Unfortunely, it sometimes calls the scheduler + * completion handling). Unfortunately, it sometimes calls the scheduler * in interrupt context which is a big NO! NO!. **/ static int qla4xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) @@ -4647,7 +4647,7 @@ static int qla4xxx_cmd_wait(struct scsi_qla_host *ha) cmd = scsi_host_find_tag(ha->host, index); /* * We cannot just check if the index is valid, - * becase if we are run from the scsi eh, then + * because if we are run from the scsi eh, then * the scsi/block layer is going to prevent * the tag from being released. */ @@ -4952,7 +4952,7 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha) /* Upon successful firmware/chip reset, re-initialize the adapter */ if (status == QLA_SUCCESS) { /* For ISP-4xxx, force function 1 to always initialize - * before function 3 to prevent both funcions from + * before function 3 to prevent both functions from * stepping on top of the other */ if (is_qla40XX(ha) && (ha->mac_index == 3)) ssleep(6); @@ -6912,7 +6912,7 @@ static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, struct ddb_entry *ddb_entry = NULL; /* Create session object, with INVALID_ENTRY, - * the targer_id would get set when we issue the login + * the target_id would get set when we issue the login */ cls_sess = iscsi_session_setup(&qla4xxx_iscsi_transport, ha->host, cmds_max, sizeof(struct ddb_entry), From 120642726ecb1b7a266f5c21bec90821e1154509 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Sep 2025 15:38:09 +0300 Subject: [PATCH 0017/1075] scsi: libfc: Prevent integer overflow in fc_fcp_recv_data() The "offset" comes from the skb->data that we received. Here the code is verifying that "offset + len" is within bounds however it does not take integer overflows into account. Use size_add() to be safe. This would only be an issue on 32bit systems which are probably a very small percent of the users. Still, it's worth fixing just for correctness sake. Fixes: 42e9a92fe6a9 ("[SCSI] libfc: A modular Fibre Channel library") Signed-off-by: Dan Carpenter Message-Id: Signed-off-by: Martin K. Petersen --- drivers/scsi/libfc/fc_fcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 16d0f02af1e4..31d08c115521 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -503,7 +503,7 @@ static void fc_fcp_recv_data(struct fc_fcp_pkt *fsp, struct fc_frame *fp) host_bcode = FC_ERROR; goto err; } - if (offset + len > fsp->data_len) { + if (size_add(offset, len) > fsp->data_len) { /* this should never happen */ if ((fr_flags(fp) & FCPHF_CRC_UNCHECKED) && fc_frame_crc_check(fp)) From 6dfc353af575e33c94f5d740f7b0569fa9b784d9 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Wed, 24 Sep 2025 16:29:00 -0700 Subject: [PATCH 0018/1075] scsi: ufs: qcom: dt-bindings: Document the Kaanapali UFS controller Document the UFS Controller on the Kaanapali Platform. Signed-off-by: Nitin Rawat Signed-off-by: Jingyi Wang Message-Id: <20250924-knp-ufs-v1-1-42e0955a1f7c@oss.qualcomm.com> Signed-off-by: Martin K. Petersen --- Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml b/Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml index aaa0bbb5bfe1..cea84ab2204f 100644 --- a/Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml +++ b/Documentation/devicetree/bindings/ufs/qcom,sm8650-ufshc.yaml @@ -15,6 +15,7 @@ select: compatible: contains: enum: + - qcom,kaanapali-ufshc - qcom,sm8650-ufshc - qcom,sm8750-ufshc required: @@ -24,6 +25,7 @@ properties: compatible: items: - enum: + - qcom,kaanapali-ufshc - qcom,sm8650-ufshc - qcom,sm8750-ufshc - const: qcom,ufshc From 7c3321f3d279eda7f7d622312ffdbb889f3bec97 Mon Sep 17 00:00:00 2001 From: Jingyi Wang Date: Wed, 24 Sep 2025 16:29:01 -0700 Subject: [PATCH 0019/1075] scsi: ufs: phy: dt-bindings: Add QMP UFS PHY compatible for Kaanapali Document the QMP UFS PHY compatible for Qualcomm Kaanapali to support physical layer functionality for UFS found on the SoC. Use fallback to indicate the compatibility of the QMP UFS PHY on the Kaanapali with that on the SM8750. Signed-off-by: Jingyi Wang Acked-by: Konrad Dybcio Signed-off-by: Martin K. Petersen --- .../devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml index a58370a6a5d3..fba7b2549dde 100644 --- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml +++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml @@ -24,6 +24,10 @@ properties: - enum: - qcom,qcs8300-qmp-ufs-phy - const: qcom,sa8775p-qmp-ufs-phy + - items: + - enum: + - qcom,kaanapali-qmp-ufs-phy + - const: qcom,sm8750-qmp-ufs-phy - enum: - qcom,msm8996-qmp-ufs-phy - qcom,msm8998-qmp-ufs-phy From 18a5f1af596e6ba22cd40ada449063041f3ce6d4 Mon Sep 17 00:00:00 2001 From: Artem Shimko Date: Tue, 7 Oct 2025 13:11:33 +0300 Subject: [PATCH 0020/1075] spi: dw-mmio: add error handling for reset_control_deassert() Currently reset_control_deassert() is called without checking its return value. This can lead to silent failures when reset deassertion fails. Add proper error handling to: 1. Check the return value of reset_control_deassert() 2. Return the error to the caller 3. Provide meaningful error message using dev_err_probe() This ensures that reset-related failures are properly reported during probe and helps with debugging reset issues. Signed-off-by: Artem Shimko Link: https://patch.msgid.link/20251007101134.1912895-1-a.shimko.dev@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-dw-mmio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c index f0f576fac77a..7a5197586919 100644 --- a/drivers/spi/spi-dw-mmio.c +++ b/drivers/spi/spi-dw-mmio.c @@ -358,7 +358,9 @@ static int dw_spi_mmio_probe(struct platform_device *pdev) if (IS_ERR(dwsmmio->rstc)) return PTR_ERR(dwsmmio->rstc); - reset_control_deassert(dwsmmio->rstc); + ret = reset_control_deassert(dwsmmio->rstc); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to deassert resets\n"); dws->bus_num = pdev->id; From 268eb6fb908bc82ce479e4dba9a2cad11f536c9c Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 19 Sep 2025 14:25:34 +0800 Subject: [PATCH 0021/1075] dt-bindings: usb: dwc3-imx8mp: dma-range is required only for imx8mp Only i.MX8MP need dma-range property to let USB controller work properly. Remove dma-range from required list and add limitation for imx8mp. Fixes: d2a704e29711 ("dt-bindings: usb: dwc3-imx8mp: add imx8mp dwc3 glue bindings") Cc: stable Reviewed-by: Jun Li Signed-off-by: Xu Yang Reviewed-by: Frank Li Acked-by: Conor Dooley Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml b/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml index baf130669c38..73e7a60a0060 100644 --- a/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml @@ -89,13 +89,21 @@ required: - reg - "#address-cells" - "#size-cells" - - dma-ranges - ranges - clocks - clock-names - interrupts - power-domains +allOf: + - if: + properties: + compatible: + const: fsl,imx8mp-dwc3 + then: + required: + - dma-ranges + additionalProperties: false examples: From 2758246d287549e1088eae350654160cbf4d424f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 29 Sep 2025 20:28:50 +0200 Subject: [PATCH 0022/1075] usb: dwc3: Don't call clk_bulk_disable_unprepare() twice devm_clk_bulk_get_all_enabled() is used in the probe, so clk_bulk_disable_unprepare() should not be called explicitly in the remove function. Fixes: e0b6dc00c701 ("usb: dwc3: add generic driver to support flattened") Signed-off-by: Christophe JAILLET Acked-by: Thinh Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-generic-plat.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-generic-plat.c b/drivers/usb/dwc3/dwc3-generic-plat.c index d96b20570002..f8ad79c08c4e 100644 --- a/drivers/usb/dwc3/dwc3-generic-plat.c +++ b/drivers/usb/dwc3/dwc3-generic-plat.c @@ -85,11 +85,8 @@ static int dwc3_generic_probe(struct platform_device *pdev) static void dwc3_generic_remove(struct platform_device *pdev) { struct dwc3 *dwc = platform_get_drvdata(pdev); - struct dwc3_generic *dwc3g = to_dwc3_generic(dwc); dwc3_core_remove(dwc); - - clk_bulk_disable_unprepare(dwc3g->num_clocks, dwc3g->clks); } static int dwc3_generic_suspend(struct device *dev) From bd8c3ce6d7a205b3ba3ef9815db4c6932290ec59 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 30 Sep 2025 19:17:21 +0200 Subject: [PATCH 0023/1075] dt-bindings: usb: switch: split out ports definition The ports definition currently defined in the usb-switch.yaml fits standards devices which are either recipient of altmode muxing and orientation switching events or an element of the USB Super Speed data lanes. This doesn't necessarely fit combo PHYs like the Qualcomm USB3/DP Combo which has a different ports representation. Move the ports definition to a separate usb-switch-ports.yaml and reference it next to the usb-switch.yaml, except for the Qualcomm USB3/DP Combo PHY bindings. Reported-by: Rob Herring Closes: https://lore.kernel.org/all/175462129176.394940.16810637795278334342.robh@kernel.org/ Fixes: 3bad7fe22796 ("dt-bindings: phy: qcom,sc8280xp-qmp-usb43dp: Reference usb-switch.yaml to allow mode-switch") Signed-off-by: Neil Armstrong Reviewed-by: Rob Herring (Arm) Signed-off-by: Greg Kroah-Hartman --- .../bindings/phy/fsl,imx8mq-usb-phy.yaml | 4 +- .../bindings/phy/samsung,usb3-drd-phy.yaml | 4 +- .../devicetree/bindings/usb/fcs,fsa4480.yaml | 1 + .../devicetree/bindings/usb/gpio-sbu-mux.yaml | 1 + .../devicetree/bindings/usb/nxp,ptn36502.yaml | 1 + .../bindings/usb/onnn,nb7vpq904m.yaml | 1 + .../bindings/usb/parade,ps8830.yaml | 1 + .../bindings/usb/qcom,wcd939x-usbss.yaml | 1 + .../devicetree/bindings/usb/ti,tusb1046.yaml | 1 + .../bindings/usb/usb-switch-ports.yaml | 68 +++++++++++++++++++ .../devicetree/bindings/usb/usb-switch.yaml | 52 -------------- 11 files changed, 81 insertions(+), 54 deletions(-) create mode 100644 Documentation/devicetree/bindings/usb/usb-switch-ports.yaml diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml index 6a47e08e0e97..f9cffbb2df07 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml @@ -142,7 +142,9 @@ allOf: required: - orientation-switch then: - $ref: /schemas/usb/usb-switch.yaml# + allOf: + - $ref: /schemas/usb/usb-switch.yaml# + - $ref: /schemas/usb/usb-switch-ports.yaml# unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml index e906403208c0..ea1135c91fb7 100644 --- a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml @@ -125,7 +125,9 @@ allOf: contains: const: google,gs101-usb31drd-phy then: - $ref: /schemas/usb/usb-switch.yaml# + allOf: + - $ref: /schemas/usb/usb-switch.yaml# + - $ref: /schemas/usb/usb-switch-ports.yaml# properties: clocks: diff --git a/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml b/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml index e3a7df91f7f1..89b1fb90aeeb 100644 --- a/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml +++ b/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml @@ -76,6 +76,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml b/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml index e588514fab2d..793662f6f3bf 100644 --- a/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml +++ b/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml @@ -52,6 +52,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# - if: required: - mode-switch diff --git a/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml b/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml index d805dde80796..4d2fcaa71870 100644 --- a/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml +++ b/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml @@ -46,6 +46,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml b/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml index 589914d22bf2..25fab5fdc2cd 100644 --- a/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml +++ b/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml @@ -91,6 +91,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/parade,ps8830.yaml b/Documentation/devicetree/bindings/usb/parade,ps8830.yaml index aeb33667818e..eaeab1c01a59 100644 --- a/Documentation/devicetree/bindings/usb/parade,ps8830.yaml +++ b/Documentation/devicetree/bindings/usb/parade,ps8830.yaml @@ -81,6 +81,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml b/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml index 96346723f3e9..96dcec9b7620 100644 --- a/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml @@ -60,6 +60,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml b/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml index f713cac4a8ac..e1501ea6b50b 100644 --- a/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml +++ b/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml @@ -11,6 +11,7 @@ maintainers: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/usb/usb-switch-ports.yaml b/Documentation/devicetree/bindings/usb/usb-switch-ports.yaml new file mode 100644 index 000000000000..6bf0c97e30ae --- /dev/null +++ b/Documentation/devicetree/bindings/usb/usb-switch-ports.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/usb/usb-switch-ports.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: USB Orientation and Mode Switches Ports Graph Properties + +maintainers: + - Greg Kroah-Hartman + +description: + Ports Graph properties for devices handling USB mode and orientation switching. + +properties: + port: + $ref: /schemas/graph.yaml#/$defs/port-base + description: + A port node to link the device to a TypeC controller for the purpose of + handling altmode muxing and orientation switching. + + properties: + endpoint: + $ref: /schemas/graph.yaml#/$defs/endpoint-base + unevaluatedProperties: false + properties: + data-lanes: + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 8 + uniqueItems: true + items: + maximum: 8 + + ports: + $ref: /schemas/graph.yaml#/properties/ports + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: + Super Speed (SS) Output endpoint to the Type-C connector + + port@1: + $ref: /schemas/graph.yaml#/$defs/port-base + description: + Super Speed (SS) Input endpoint from the Super-Speed PHY + unevaluatedProperties: false + + properties: + endpoint: + $ref: /schemas/graph.yaml#/$defs/endpoint-base + unevaluatedProperties: false + properties: + data-lanes: + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 8 + uniqueItems: true + items: + maximum: 8 + +oneOf: + - required: + - port + - required: + - ports + +additionalProperties: true diff --git a/Documentation/devicetree/bindings/usb/usb-switch.yaml b/Documentation/devicetree/bindings/usb/usb-switch.yaml index 896201912630..f77731493dc4 100644 --- a/Documentation/devicetree/bindings/usb/usb-switch.yaml +++ b/Documentation/devicetree/bindings/usb/usb-switch.yaml @@ -25,56 +25,4 @@ properties: description: Possible handler of SuperSpeed signals retiming type: boolean - port: - $ref: /schemas/graph.yaml#/$defs/port-base - description: - A port node to link the device to a TypeC controller for the purpose of - handling altmode muxing and orientation switching. - - properties: - endpoint: - $ref: /schemas/graph.yaml#/$defs/endpoint-base - unevaluatedProperties: false - properties: - data-lanes: - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 1 - maxItems: 8 - uniqueItems: true - items: - maximum: 8 - - ports: - $ref: /schemas/graph.yaml#/properties/ports - properties: - port@0: - $ref: /schemas/graph.yaml#/properties/port - description: - Super Speed (SS) Output endpoint to the Type-C connector - - port@1: - $ref: /schemas/graph.yaml#/$defs/port-base - description: - Super Speed (SS) Input endpoint from the Super-Speed PHY - unevaluatedProperties: false - - properties: - endpoint: - $ref: /schemas/graph.yaml#/$defs/endpoint-base - unevaluatedProperties: false - properties: - data-lanes: - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 1 - maxItems: 8 - uniqueItems: true - items: - maximum: 8 - -oneOf: - - required: - - port - - required: - - ports - additionalProperties: true From dddc0f71485f1f29f236e387632181bcc09019a0 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 6 Oct 2025 22:39:52 +0100 Subject: [PATCH 0024/1075] usb: misc: Add x86 dependency for Intel USBIO driver The Intel USBIO driver is x86 only, other architectures have ACPI so add an appropriate depenecy plus compile test. Fixes: 121a0f839dbb3 ("usb: misc: Add Intel USBIO bridge driver") Signed-off-by: Peter Robinson Reviewed-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig index 09ac6f1c985f..0b56b773dbdf 100644 --- a/drivers/usb/misc/Kconfig +++ b/drivers/usb/misc/Kconfig @@ -182,6 +182,7 @@ config USB_LJCA config USB_USBIO tristate "Intel USBIO Bridge support" depends on USB && ACPI + depends on X86 || COMPILE_TEST select AUXILIARY_BUS help This adds support for Intel USBIO drivers. From 51cb04abd39097209b871e95ffa7e8584ce7dcba Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Mon, 13 Oct 2025 09:29:20 +0530 Subject: [PATCH 0025/1075] dt-bindings: usb: qcom,snps-dwc3: Fix bindings for X1E80100 Add the missing multiport controller binding to target list. Fix minItems for interrupt-names to avoid the following error on High Speed controller: usb@a200000: interrupt-names: ['dwc_usb3', 'pwr_event', 'dp_hs_phy_irq', 'dm_hs_phy_irq'] is too short Fixes: 6e762f7b8edc ("dt-bindings: usb: Introduce qcom,snps-dwc3") Cc: stable@vger.kernel.org Signed-off-by: Krishna Kurapati Reviewed-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml index dfd084ed9024..d49a58d5478f 100644 --- a/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml @@ -68,6 +68,7 @@ properties: - qcom,sm8550-dwc3 - qcom,sm8650-dwc3 - qcom,x1e80100-dwc3 + - qcom,x1e80100-dwc3-mp - const: qcom,snps-dwc3 reg: @@ -460,8 +461,10 @@ allOf: then: properties: interrupts: + minItems: 4 maxItems: 5 interrupt-names: + minItems: 4 items: - const: dwc_usb3 - const: pwr_event From d3c4c1f29aadccf2f43530bfa1e60a6d8030fd4a Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Sun, 28 Sep 2025 11:18:18 +0200 Subject: [PATCH 0026/1075] staging: gpib: Fix no EOI on 1 and 2 byte writes EOI (End Or Identify) is a hardware line on the GPIB bus that can be asserted with the last byte of a message to indicate the end of the transfer to the receiving device. In this driver, a write with send_eoi true is done in 3 parts: Send first byte directly Send remaining but 1 bytes using the fifo Send the last byte directly with EOI asserted The first byte in a write is always sent by writing to the tms9914 chip directly to setup for the subsequent fifo transfer. We were not checking for a 1 byte write with send_eoi true resulting in EOI not being asserted. Since the fifo transfer was not executed (fifotransfersize == 0) the retval in the test after the fifo transfer code was still 1 from the preceding direct write. This caused it to return without executing the final direct write which would have sent an unsollicited extra byte. For a 2 byte message the first byte was sent directly. But since the fifo transfer was not executed (fifotransfersize == 1) and the retval in the test after the fifo transfer code was still 1 from the preceding first byte write it returned before the final direct byte write with send_eoi true. The second byte was then sent as a separate 1 byte write to complete the 2 byte write count again without EOI being asserted as above. Only send the first byte directly if more than 1 byte is to be transferred with send_eoi true. Also check for retval < 0 for the error return in case the fifo code is not used (1 or 2 byte message with send_eoi true). Fixes: 09a4655ee1eb ("staging: gpib: Add HP/Agilent/Keysight 8235xx PCI GPIB driver") Cc: stable Tested-by: Dave Penkler Signed-off-by: Dave Penkler Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/agilent_82350b/agilent_82350b.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c index 94bbb3b6576d..01a5bb43cd2d 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c @@ -182,10 +182,12 @@ static int agilent_82350b_accel_write(struct gpib_board *board, u8 *buffer, return retval; #endif - retval = agilent_82350b_write(board, buffer, 1, 0, &num_bytes); - *bytes_written += num_bytes; - if (retval < 0) - return retval; + if (fifotransferlength > 0) { + retval = agilent_82350b_write(board, buffer, 1, 0, &num_bytes); + *bytes_written += num_bytes; + if (retval < 0) + return retval; + } write_byte(tms_priv, tms_priv->imr0_bits & ~HR_BOIE, IMR0); for (i = 1; i < fifotransferlength;) { @@ -217,7 +219,7 @@ static int agilent_82350b_accel_write(struct gpib_board *board, u8 *buffer, break; } write_byte(tms_priv, tms_priv->imr0_bits, IMR0); - if (retval) + if (retval < 0) return retval; if (send_eoi) { From 92a2b74a6b5a5d9b076cd9aa75e63c6461cbd073 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Sun, 28 Sep 2025 13:33:58 +0200 Subject: [PATCH 0027/1075] staging: gpib: Fix sending clear and trigger events This driver was not sending device clear or trigger events when the board entered the DCAS or DTAS state respectively in device mode. DCAS is the Device Clear Active State which is entered on receiving a selective device clear message (SDC) or universal device clear message (DCL) from the controller in charge. DTAS is the Device Trigger Active State which is entered on receiving a group execute trigger (GET) message from the controller. In order for an application, implementing a particular device, to detect when one of these states is entered the driver needs to send the appropriate event. Send the appropriate gpib_event when DCAS or DTAS is set in the reported status word. This sets the DCAS or DTAS bits in the board's status word which can be monitored by the application. Fixes: 4e127de14fa7 ("staging: gpib: Add National Instruments USB GPIB driver") Cc: stable Tested-by: Dave Penkler Signed-off-by: Dave Penkler Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index 4dec87d12687..ea44a766fda2 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -327,7 +327,10 @@ static void ni_usb_soft_update_status(struct gpib_board *board, unsigned int ni_ board->status &= ~clear_mask; board->status &= ~ni_usb_ibsta_mask; board->status |= ni_usb_ibsta & ni_usb_ibsta_mask; - // FIXME should generate events on DTAS and DCAS + if (ni_usb_ibsta & DCAS) + push_gpib_event(board, EVENT_DEV_CLR); + if (ni_usb_ibsta & DTAS) + push_gpib_event(board, EVENT_DEV_TRG); spin_lock_irqsave(&board->spinlock, flags); /* remove set status bits from monitored set why ?***/ From aaf2af1ed147ef49be65afb541a67255e9f60d15 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Sun, 28 Sep 2025 13:33:59 +0200 Subject: [PATCH 0028/1075] staging: gpib: Return -EINTR on device clear When the ATN (Attention) line is asserted during a read we get a NIUSB_ATN_STATE_ERROR during a read. For the controller to send a device clear it asserts ATN. Normally this is an error but in the case of a device clear it should be regarded as an interrupt. Return -EINTR when the Device Clear Active State (DCAS) is entered else signal an error with dev_dbg with status instead of just dev_err. Signed-off-by: Dave Penkler Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index ea44a766fda2..1f8412de9fa3 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -697,8 +697,12 @@ static int ni_usb_read(struct gpib_board *board, u8 *buffer, size_t length, */ break; case NIUSB_ATN_STATE_ERROR: - retval = -EIO; - dev_err(&usb_dev->dev, "read when ATN set\n"); + if (status.ibsta & DCAS) { + retval = -EINTR; + } else { + retval = -EIO; + dev_dbg(&usb_dev->dev, "read when ATN set stat: 0x%06x\n", status.ibsta); + } break; case NIUSB_ADDRESSING_ERROR: retval = -EIO; From b1aabb8ef09b4cf0cc0c92ca9dfd19482f3192c1 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 23 Sep 2025 09:36:03 +0800 Subject: [PATCH 0029/1075] staging: gpib: Fix device reference leak in fmh_gpib driver The fmh_gpib driver contains a device reference count leak in fmh_gpib_attach_impl() where driver_find_device() increases the reference count of the device by get_device() when matching but this reference is not properly decreased. Add put_device() in fmh_gpib_detach(), which ensures that the reference count of the device is correctly managed. Found by code review. Cc: stable Fixes: 8e4841a0888c ("staging: gpib: Add Frank Mori Hess FPGA PCI GPIB driver") Signed-off-by: Ma Ke Reviewed-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/fmh_gpib/fmh_gpib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c index 164dcfc3c9ef..f7bfb4a8e553 100644 --- a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c +++ b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c @@ -1517,6 +1517,11 @@ void fmh_gpib_detach(struct gpib_board *board) resource_size(e_priv->gpib_iomem_res)); } fmh_gpib_generic_detach(board); + + if (board->dev) { + put_device(board->dev); + board->dev = NULL; + } } static int fmh_gpib_pci_attach_impl(struct gpib_board *board, From 7e69a24b6b35d4ffd54dd702047a01f5858b3e45 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 24 Sep 2025 15:05:10 +0200 Subject: [PATCH 0030/1075] rust_binder: clean `clippy::mem_replace_with_default` warning Clippy reports: error: replacing a value of type `T` with `T::default()` is better expressed using `core::mem::take` --> drivers/android/binder/node.rs:690:32 | 690 | _unused_capacity = mem::replace(&mut inner.freeze_list, KVVec::new()); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider using: `core::mem::take(&mut inner.freeze_list)` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#mem_replace_with_default = note: `-D clippy::mem-replace-with-default` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(clippy::mem_replace_with_default)]` The suggestion seems fine, thus apply it. Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/node.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder/node.rs b/drivers/android/binder/node.rs index ade895ef791e..08d362deaf61 100644 --- a/drivers/android/binder/node.rs +++ b/drivers/android/binder/node.rs @@ -687,7 +687,7 @@ pub(crate) fn remove_freeze_listener(&self, p: &Arc) { ); } if inner.freeze_list.is_empty() { - _unused_capacity = mem::replace(&mut inner.freeze_list, KVVec::new()); + _unused_capacity = mem::take(&mut inner.freeze_list); } } From c7c090af371775106360c9e7a7c35b718311c3f9 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 2 Oct 2025 09:25:29 +0000 Subject: [PATCH 0031/1075] rust_binder: remove warning about orphan mappings This condition occurs if a thread dies while processing a transaction. We should not print anything in this scenario. Signed-off-by: Alice Ryhl Reviewed-by: Joel Fernandes Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/process.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs index f13a747e784c..d8c3c1ae740e 100644 --- a/drivers/android/binder/process.rs +++ b/drivers/android/binder/process.rs @@ -1346,10 +1346,6 @@ fn deferred_release(self: Arc) { .alloc .take_for_each(|offset, size, debug_id, odata| { let ptr = offset + address; - pr_warn!( - "{}: removing orphan mapping {offset}:{size}\n", - self.pid_in_current_ns() - ); let mut alloc = Allocation::new(self.clone(), debug_id, offset, size, ptr, false); if let Some(data) = odata { From bfe144da06b002cccf314769c45ecccb69501c48 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 7 Oct 2025 09:39:51 +0000 Subject: [PATCH 0032/1075] rust_binder: freeze_notif_done should resend if wrong state Consider the following scenario: 1. A freeze notification is delivered to thread 1. 2. The process becomes frozen or unfrozen. 3. The message for step 2 is delivered to thread 2 and ignored because there is already a pending notification from step 1. 4. Thread 1 acknowledges the notification from step 1. In this case, step 4 should ensure that the message ignored in step 3 is resent as it can now be delivered. Signed-off-by: Alice Ryhl Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/freeze.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder/freeze.rs b/drivers/android/binder/freeze.rs index e68c3c8bc55a..74bebb8d4d9b 100644 --- a/drivers/android/binder/freeze.rs +++ b/drivers/android/binder/freeze.rs @@ -245,8 +245,9 @@ pub(crate) fn freeze_notif_done(self: &Arc, reader: &mut UserSliceReader) ); return Err(EINVAL); } - if freeze.is_clearing { - // Immediately send another FreezeMessage for BR_CLEAR_FREEZE_NOTIFICATION_DONE. + let is_frozen = freeze.node.owner.inner.lock().is_frozen; + if freeze.is_clearing || freeze.last_is_frozen != Some(is_frozen) { + // Immediately send another FreezeMessage. clear_msg = Some(FreezeMessage::init(alloc, cookie)); } freeze.is_pending = false; From 99559e5bb4c6795824b6531ad61519c1d9500079 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 7 Oct 2025 09:39:52 +0000 Subject: [PATCH 0033/1075] rust_binder: don't delete FreezeListener if there are pending duplicates When userspace issues commands to a freeze listener, it identifies it using a cookie. Normally this cookie uniquely identifies a freeze listener, but when userspace clears a listener with the intent of deleting it, it's allowed to "regret" clearing it and create a new freeze listener for the same node using the same cookie. (IMO this was an API mistake, but userspace relies on it.) Currently if the active freeze listener gets fully deleted while there are still pending duplicates, then the code incorrectly deletes the pending duplicates too. To fix this, do not delete the entry if there are still pending duplicates. Since the current data structure requires a main freeze listener, we convert one pending duplicate into the primary listener in this scenario. Signed-off-by: Alice Ryhl Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/freeze.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder/freeze.rs b/drivers/android/binder/freeze.rs index 74bebb8d4d9b..e304aceca7f3 100644 --- a/drivers/android/binder/freeze.rs +++ b/drivers/android/binder/freeze.rs @@ -106,7 +106,16 @@ fn do_work( return Ok(true); } if freeze.is_clearing { - _removed_listener = freeze_entry.remove_node(); + kernel::warn_on!(freeze.num_cleared_duplicates != 0); + if freeze.num_pending_duplicates > 0 { + // The primary freeze listener was deleted, so convert a pending duplicate back + // into the primary one. + freeze.num_pending_duplicates -= 1; + freeze.is_pending = true; + freeze.is_clearing = true; + } else { + _removed_listener = freeze_entry.remove_node(); + } drop(node_refs); writer.write_code(BR_CLEAR_FREEZE_NOTIFICATION_DONE)?; writer.write_payload(&self.cookie.0)?; From b5ce7a5cc50f4c283d0bfa5cc24fe864cb9a3400 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 7 Oct 2025 09:39:53 +0000 Subject: [PATCH 0034/1075] rust_binder: report freeze notification only when fully frozen Binder only sends out freeze notifications when ioctl_freeze() completes and the process has become fully frozen. However, if a freeze notification is registered during the freeze operation, then it registers an initial state of 'frozen'. This is a problem because if the freeze operation fails, then the listener is not told about that state change, leading to lost updates. Signed-off-by: Alice Ryhl Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/freeze.rs | 4 +-- drivers/android/binder/process.rs | 46 +++++++++++++++++++++------ drivers/android/binder/transaction.rs | 6 ++-- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/android/binder/freeze.rs b/drivers/android/binder/freeze.rs index e304aceca7f3..220de35ae85a 100644 --- a/drivers/android/binder/freeze.rs +++ b/drivers/android/binder/freeze.rs @@ -121,7 +121,7 @@ fn do_work( writer.write_payload(&self.cookie.0)?; Ok(true) } else { - let is_frozen = freeze.node.owner.inner.lock().is_frozen; + let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen(); if freeze.last_is_frozen == Some(is_frozen) { return Ok(true); } @@ -254,7 +254,7 @@ pub(crate) fn freeze_notif_done(self: &Arc, reader: &mut UserSliceReader) ); return Err(EINVAL); } - let is_frozen = freeze.node.owner.inner.lock().is_frozen; + let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen(); if freeze.is_clearing || freeze.last_is_frozen != Some(is_frozen) { // Immediately send another FreezeMessage. clear_msg = Some(FreezeMessage::init(alloc, cookie)); diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs index d8c3c1ae740e..7607353a5e92 100644 --- a/drivers/android/binder/process.rs +++ b/drivers/android/binder/process.rs @@ -72,6 +72,33 @@ fn new(address: usize, size: usize) -> Self { const PROC_DEFER_FLUSH: u8 = 1; const PROC_DEFER_RELEASE: u8 = 2; +#[derive(Copy, Clone)] +pub(crate) enum IsFrozen { + Yes, + No, + InProgress, +} + +impl IsFrozen { + /// Whether incoming transactions should be rejected due to freeze. + pub(crate) fn is_frozen(self) -> bool { + match self { + IsFrozen::Yes => true, + IsFrozen::No => false, + IsFrozen::InProgress => true, + } + } + + /// Whether freeze notifications consider this process frozen. + pub(crate) fn is_fully_frozen(self) -> bool { + match self { + IsFrozen::Yes => true, + IsFrozen::No => false, + IsFrozen::InProgress => false, + } + } +} + /// The fields of `Process` protected by the spinlock. pub(crate) struct ProcessInner { is_manager: bool, @@ -98,7 +125,7 @@ pub(crate) struct ProcessInner { /// are woken up. outstanding_txns: u32, /// Process is frozen and unable to service binder transactions. - pub(crate) is_frozen: bool, + pub(crate) is_frozen: IsFrozen, /// Process received sync transactions since last frozen. pub(crate) sync_recv: bool, /// Process received async transactions since last frozen. @@ -124,7 +151,7 @@ fn new() -> Self { started_thread_count: 0, defer_work: 0, outstanding_txns: 0, - is_frozen: false, + is_frozen: IsFrozen::No, sync_recv: false, async_recv: false, binderfs_file: None, @@ -1260,7 +1287,7 @@ fn deferred_release(self: Arc) { let is_manager = { let mut inner = self.inner.lock(); inner.is_dead = true; - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; inner.sync_recv = false; inner.async_recv = false; inner.is_manager @@ -1367,7 +1394,7 @@ pub(crate) fn drop_outstanding_txn(&self) { return; } inner.outstanding_txns -= 1; - inner.is_frozen && inner.outstanding_txns == 0 + inner.is_frozen.is_frozen() && inner.outstanding_txns == 0 }; if wake { @@ -1381,7 +1408,7 @@ pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { let mut inner = self.inner.lock(); inner.sync_recv = false; inner.async_recv = false; - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; drop(inner); msgs.send_messages(); return Ok(()); @@ -1390,7 +1417,7 @@ pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { let mut inner = self.inner.lock(); inner.sync_recv = false; inner.async_recv = false; - inner.is_frozen = true; + inner.is_frozen = IsFrozen::InProgress; if info.timeout_ms > 0 { let mut jiffies = kernel::time::msecs_to_jiffies(info.timeout_ms); @@ -1404,7 +1431,7 @@ pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { .wait_interruptible_timeout(&mut inner, jiffies) { CondVarTimeoutResult::Signal { .. } => { - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; return Err(ERESTARTSYS); } CondVarTimeoutResult::Woken { jiffies: remaining } => { @@ -1418,17 +1445,18 @@ pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { } if inner.txns_pending_locked() { - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; Err(EAGAIN) } else { drop(inner); match self.prepare_freeze_messages() { Ok(batch) => { + self.inner.lock().is_frozen = IsFrozen::Yes; batch.send_messages(); Ok(()) } Err(kernel::alloc::AllocError) => { - self.inner.lock().is_frozen = false; + self.inner.lock().is_frozen = IsFrozen::No; Err(ENOMEM) } } diff --git a/drivers/android/binder/transaction.rs b/drivers/android/binder/transaction.rs index 02512175d622..4bd3c0e417eb 100644 --- a/drivers/android/binder/transaction.rs +++ b/drivers/android/binder/transaction.rs @@ -249,7 +249,7 @@ pub(crate) fn submit(self: DLArc) -> BinderResult { if oneway { if let Some(target_node) = self.target_node.clone() { - if process_inner.is_frozen { + if process_inner.is_frozen.is_frozen() { process_inner.async_recv = true; if self.flags & TF_UPDATE_TXN != 0 { if let Some(t_outdated) = @@ -270,7 +270,7 @@ pub(crate) fn submit(self: DLArc) -> BinderResult { } } - if process_inner.is_frozen { + if process_inner.is_frozen.is_frozen() { return Err(BinderError::new_frozen_oneway()); } else { return Ok(()); @@ -280,7 +280,7 @@ pub(crate) fn submit(self: DLArc) -> BinderResult { } } - if process_inner.is_frozen { + if process_inner.is_frozen.is_frozen() { process_inner.sync_recv = true; return Err(BinderError::new_frozen()); } From 7557f189942571821a09879edfcdfdafefe4d67f Mon Sep 17 00:00:00 2001 From: Kriish Sharma Date: Fri, 3 Oct 2025 18:08:49 +0000 Subject: [PATCH 0035/1075] binder: Fix missing kernel-doc entries in binder.c Fix several kernel-doc warnings in `drivers/android/binder.c` caused by undocumented struct members and function parameters. In particular, add missing documentation for the `@thread` parameter in binder_free_buf_locked(). Signed-off-by: Kriish Sharma Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8c99ceaa303b..3a09c54bc37b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2418,10 +2418,10 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset, /** * struct binder_ptr_fixup - data to be fixed-up in target buffer - * @offset offset in target buffer to fixup - * @skip_size bytes to skip in copy (fixup will be written later) - * @fixup_data data to write at fixup offset - * @node list node + * @offset: offset in target buffer to fixup + * @skip_size: bytes to skip in copy (fixup will be written later) + * @fixup_data: data to write at fixup offset + * @node: list node * * This is used for the pointer fixup list (pf) which is created and consumed * during binder_transaction() and is only accessed locally. No @@ -2438,10 +2438,10 @@ struct binder_ptr_fixup { /** * struct binder_sg_copy - scatter-gather data to be copied - * @offset offset in target buffer - * @sender_uaddr user address in source buffer - * @length bytes to copy - * @node list node + * @offset: offset in target buffer + * @sender_uaddr: user address in source buffer + * @length: bytes to copy + * @node: list node * * This is used for the sg copy list (sgc) which is created and consumed * during binder_transaction() and is only accessed locally. No @@ -4063,14 +4063,15 @@ binder_freeze_notification_done(struct binder_proc *proc, /** * binder_free_buf() - free the specified buffer - * @proc: binder proc that owns buffer - * @buffer: buffer to be freed - * @is_failure: failed to send transaction + * @proc: binder proc that owns buffer + * @thread: binder thread performing the buffer release + * @buffer: buffer to be freed + * @is_failure: failed to send transaction * - * If buffer for an async transaction, enqueue the next async + * If the buffer is for an async transaction, enqueue the next async * transaction from the node. * - * Cleanup buffer and free it. + * Cleanup the buffer and free it. */ static void binder_free_buf(struct binder_proc *proc, From 11fb1a82aefa6f7fea6ac82334edb5639b9927df Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 23 Sep 2025 16:09:27 +0100 Subject: [PATCH 0036/1075] firmware: arm_ffa: Add support for IMPDEF value in the memory access descriptor FF-A v1.2 introduced 16 byte IMPLEMENTATION DEFINED value in the endpoint memory access descriptor to allow any sender could to specify an its any custom value for each receiver. Also this value must be specified by the receiver when retrieving the memory region. The sender must ensure it informs the receiver of this value via an IMPLEMENTATION DEFINED mechanism such as a partition message. So the FF-A driver can use the message interfaces to communicate the value and set the same in the ffa_mem_region_attributes structures when using the memory interfaces. The driver ensure that the size of the endpoint memory access descriptors is set correctly based on the FF-A version. Fixes: 9fac08d9d985 ("firmware: arm_ffa: Upgrade FF-A version to v1.2 in the driver") Reported-by: Lixiang Mao Tested-by: Lixiang Mao Message-Id: <20250923150927.1218364-1-sudeep.holla@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 37 ++++++++++++++++++++++--------- include/linux/arm_ffa.h | 21 ++++++++++++++++-- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 65bf1685350a..c72ee4756585 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -649,6 +649,26 @@ static u16 ffa_memory_attributes_get(u32 func_id) return FFA_MEM_NORMAL | FFA_MEM_WRITE_BACK | FFA_MEM_INNER_SHAREABLE; } +static void ffa_emad_impdef_value_init(u32 version, void *dst, void *src) +{ + struct ffa_mem_region_attributes *ep_mem_access; + + if (FFA_EMAD_HAS_IMPDEF_FIELD(version)) + memcpy(dst, src, sizeof(ep_mem_access->impdef_val)); +} + +static void +ffa_mem_region_additional_setup(u32 version, struct ffa_mem_region *mem_region) +{ + if (!FFA_MEM_REGION_HAS_EP_MEM_OFFSET(version)) { + mem_region->ep_mem_size = 0; + } else { + mem_region->ep_mem_size = ffa_emad_size_get(version); + mem_region->ep_mem_offset = sizeof(*mem_region); + memset(mem_region->reserved, 0, 12); + } +} + static int ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, struct ffa_mem_ops_args *args) @@ -667,27 +687,24 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, mem_region->flags = args->flags; mem_region->sender_id = drv_info->vm_id; mem_region->attributes = ffa_memory_attributes_get(func_id); - ep_mem_access = buffer + - ffa_mem_desc_offset(buffer, 0, drv_info->version); composite_offset = ffa_mem_desc_offset(buffer, args->nattrs, drv_info->version); - for (idx = 0; idx < args->nattrs; idx++, ep_mem_access++) { + for (idx = 0; idx < args->nattrs; idx++) { + ep_mem_access = buffer + + ffa_mem_desc_offset(buffer, idx, drv_info->version); ep_mem_access->receiver = args->attrs[idx].receiver; ep_mem_access->attrs = args->attrs[idx].attrs; ep_mem_access->composite_off = composite_offset; ep_mem_access->flag = 0; ep_mem_access->reserved = 0; + ffa_emad_impdef_value_init(drv_info->version, + ep_mem_access->impdef_val, + args->attrs[idx].impdef_val); } mem_region->handle = 0; mem_region->ep_count = args->nattrs; - if (drv_info->version <= FFA_VERSION_1_0) { - mem_region->ep_mem_size = 0; - } else { - mem_region->ep_mem_size = sizeof(*ep_mem_access); - mem_region->ep_mem_offset = sizeof(*mem_region); - memset(mem_region->reserved, 0, 12); - } + ffa_mem_region_additional_setup(drv_info->version, mem_region); composite = buffer + composite_offset; composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg); diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index cd7ee4df9045..81e603839c4a 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -338,6 +338,7 @@ struct ffa_mem_region_attributes { * an `struct ffa_mem_region_addr_range`. */ u32 composite_off; + u8 impdef_val[16]; u64 reserved; }; @@ -417,15 +418,31 @@ struct ffa_mem_region { #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) +#define FFA_EMAD_HAS_IMPDEF_FIELD(version) ((version) >= FFA_VERSION_1_2) +#define FFA_MEM_REGION_HAS_EP_MEM_OFFSET(version) ((version) > FFA_VERSION_1_0) + +static inline u32 ffa_emad_size_get(u32 ffa_version) +{ + u32 sz; + struct ffa_mem_region_attributes *ep_mem_access; + + if (FFA_EMAD_HAS_IMPDEF_FIELD(ffa_version)) + sz = sizeof(*ep_mem_access); + else + sz = sizeof(*ep_mem_access) - sizeof(ep_mem_access->impdef_val); + + return sz; +} + static inline u32 ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) { - u32 offset = count * sizeof(struct ffa_mem_region_attributes); + u32 offset = count * ffa_emad_size_get(ffa_version); /* * Earlier to v1.1, the endpoint memory descriptor array started at * offset 32(i.e. offset of ep_mem_offset in the current structure) */ - if (ffa_version <= FFA_VERSION_1_0) + if (!FFA_MEM_REGION_HAS_EP_MEM_OFFSET(ffa_version)) offset += offsetof(struct ffa_mem_region, ep_mem_offset); else offset += sizeof(struct ffa_mem_region); From a89103f67112453fa36c9513e951c19eed9d2d92 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 22 Sep 2025 16:47:13 +0800 Subject: [PATCH 0037/1075] spi: spi-nxp-fspi: re-config the clock rate when operation require new clock rate Current operation contain the max_freq, so new coming operation may use new clock rate, need to re-config the clock rate to match the requirement. Fixes: 26851cf65ffc ("spi: nxp-fspi: Support per spi-mem operation frequency switches") Signed-off-by: Haibo Chen Link: https://patch.msgid.link/20250922-fspi-fix-v1-1-ff4315359d31@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-nxp-fspi.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index f9371f98a65b..4e82f9e900ac 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -404,6 +404,8 @@ struct nxp_fspi { #define FSPI_NEED_INIT BIT(0) #define FSPI_DTR_MODE BIT(1) int flags; + /* save the previous operation clock rate */ + unsigned long pre_op_rate; }; static inline int needs_ip_only(struct nxp_fspi *f) @@ -780,11 +782,17 @@ static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi, uint64_t size_kb; /* - * Return, if previously selected target device is same as current - * requested target device. Also the DTR or STR mode do not change. + * Return when following condition all meet, + * 1, if previously selected target device is same as current + * requested target device. + * 2, the DTR or STR mode do not change. + * 3, previous operation max rate equals current one. + * + * For other case, need to re-config. */ if ((f->selected == spi_get_chipselect(spi, 0)) && - (!!(f->flags & FSPI_DTR_MODE) == op_is_dtr)) + (!!(f->flags & FSPI_DTR_MODE) == op_is_dtr) && + (f->pre_op_rate == op->max_freq)) return; /* Reset FLSHxxCR0 registers */ @@ -832,6 +840,8 @@ static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi, else nxp_fspi_dll_override(f); + f->pre_op_rate = op->max_freq; + f->selected = spi_get_chipselect(spi, 0); } From b93b4269791fdebbac2a9ad26f324dc2abb9e60f Mon Sep 17 00:00:00 2001 From: Han Xu Date: Mon, 22 Sep 2025 16:47:14 +0800 Subject: [PATCH 0038/1075] spi: spi-nxp-fspi: add extra delay after dll locked Due to the erratum ERR050272, the DLL lock status register STS2 [xREFLOCK, xSLVLOCK] bit may indicate DLL is locked before DLL is actually locked. Add an extra 4us delay as a workaround. refer to ERR050272, on Page 20. https://www.nxp.com/docs/en/errata/IMX8_1N94W.pdf Fixes: 99d822b3adc4 ("spi: spi-nxp-fspi: use DLL calibration when clock rate > 100MHz") Signed-off-by: Han Xu Signed-off-by: Haibo Chen Link: https://patch.msgid.link/20250922-fspi-fix-v1-2-ff4315359d31@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-nxp-fspi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index 4e82f9e900ac..96b3654b45ab 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -721,6 +721,12 @@ static void nxp_fspi_dll_calibration(struct nxp_fspi *f) 0, POLL_TOUT, true); if (ret) dev_warn(f->dev, "DLL lock failed, please fix it!\n"); + + /* + * For ERR050272, DLL lock status bit is not accurate, + * wait for 4us more as a workaround. + */ + udelay(4); } /* From f43579ef3500527649b1c233be7cf633806353aa Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 22 Sep 2025 16:47:15 +0800 Subject: [PATCH 0039/1075] spi: spi-nxp-fspi: limit the clock rate for different sample clock source selection For different sample clock source selection, the max frequency flexspi supported are different. For mode 0, max frequency is 66MHz. For mode 3, the max frequency is 166MHz. Refer to 3.9.9 FlexSPI timing parameters on page 65. https://www.nxp.com/docs/en/data-sheet/IMX8MNCEC.pdf Though flexspi maybe still work under higher frequency, but can't guarantee the stability. IC suggest to add this limitation on all SoCs which contain flexspi. Fixes: c07f27032317 ("spi: spi-nxp-fspi: add the support for sample data from DQS pad") Signed-off-by: Haibo Chen Link: https://patch.msgid.link/20250922-fspi-fix-v1-3-ff4315359d31@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-nxp-fspi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index 96b3654b45ab..b6c79e50d842 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -406,6 +406,8 @@ struct nxp_fspi { int flags; /* save the previous operation clock rate */ unsigned long pre_op_rate; + /* the max clock rate fspi output to device */ + unsigned long max_rate; }; static inline int needs_ip_only(struct nxp_fspi *f) @@ -687,10 +689,13 @@ static void nxp_fspi_select_rx_sample_clk_source(struct nxp_fspi *f, * change the mode back to mode 0. */ reg = fspi_readl(f, f->iobase + FSPI_MCR0); - if (op_is_dtr) + if (op_is_dtr) { reg |= FSPI_MCR0_RXCLKSRC(3); - else /*select mode 0 */ + f->max_rate = 166000000; + } else { /*select mode 0 */ reg &= ~FSPI_MCR0_RXCLKSRC(3); + f->max_rate = 66000000; + } fspi_writel(f, reg, f->iobase + FSPI_MCR0); } @@ -816,6 +821,7 @@ static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi, dev_dbg(f->dev, "Target device [CS:%x] selected\n", spi_get_chipselect(spi, 0)); nxp_fspi_select_rx_sample_clk_source(f, op_is_dtr); + rate = min(f->max_rate, op->max_freq); if (op_is_dtr) { f->flags |= FSPI_DTR_MODE; From 8735696acea24ac1f9d4490992418c71941ca68c Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Thu, 9 Oct 2025 09:10:38 +0200 Subject: [PATCH 0040/1075] spi: cadence-quadspi: Fix pm_runtime unbalance on dma EPROBE_DEFER In csqspi_probe(), when cqspi_request_mmap_dma() returns -EPROBE_DEFER, we handle the error by jumping to probe_setup_failed. In that label, we call pm_runtime_disable(), even if we never called pm_runtime_enable() before. Because of this, the driver cannot probe: [ 2.690018] cadence-qspi 47040000.spi: No Rx DMA available [ 2.699735] spi-nor spi0.0: resume failed with -13 [ 2.699741] spi-nor: probe of spi0.0 failed with error -13 Only call pm_runtime_disable() if it was enabled by adding a new label to handle cqspi_request_mmap_dma() failures. Fixes: b07f349d1864 ("spi: spi-cadence-quadspi: Fix pm runtime unbalance") Signed-off-by: Mattijs Korpershoek Reviewed-by: Dan Carpenter Link: https://patch.msgid.link/20251009-cadence-quadspi-fix-pm-runtime-v2-1-8bdfefc43902@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 8fb13df8ff87..81017402bc56 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1995,7 +1995,7 @@ static int cqspi_probe(struct platform_device *pdev) if (cqspi->use_direct_mode) { ret = cqspi_request_mmap_dma(cqspi); if (ret == -EPROBE_DEFER) - goto probe_setup_failed; + goto probe_dma_failed; } if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) { @@ -2019,9 +2019,10 @@ static int cqspi_probe(struct platform_device *pdev) return 0; probe_setup_failed: - cqspi_controller_enable(cqspi, 0); if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) pm_runtime_disable(dev); +probe_dma_failed: + cqspi_controller_enable(cqspi, 0); probe_reset_failed: if (cqspi->is_jh7110) cqspi_jh7110_disable_clk(pdev, cqspi); From 79c0a2b7abc906c7cf3c793256c6b638d7dc477f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 27 Sep 2025 15:26:34 +0300 Subject: [PATCH 0041/1075] EDAC/versalnet: Fix off by one in handle_error() The priv->mci[] array has NUM_CONTROLLERS so this > comparison needs to be >= to prevent an out of bounds access. Fixes: d5fe2fec6c40 ("EDAC: Add a driver for the AMD Versal NET DDR controller") Signed-off-by: Dan Carpenter Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Yazen Ghannam --- drivers/edac/versalnet_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index 7c5db8bf0595..1ded4c3f0213 100644 --- a/drivers/edac/versalnet_edac.c +++ b/drivers/edac/versalnet_edac.c @@ -433,7 +433,7 @@ static void handle_error(struct mc_priv *priv, struct ecc_status *stat, phys_addr_t pfn; int err; - if (WARN_ON_ONCE(ctl_num > NUM_CONTROLLERS)) + if (WARN_ON_ONCE(ctl_num >= NUM_CONTROLLERS)) return; mci = priv->mci[ctl_num]; From aa960b597600bed80fe171729057dd6aa188b5b5 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 24 Sep 2025 09:56:05 +0100 Subject: [PATCH 0042/1075] arm64: dts: broadcom: bcm2712: Define VGIC interrupt Define the interrupt in the GICv2 for vGIC so KVM can be used, it was missed from the original upstream DTB for some reason. Signed-off-by: Peter Robinson Cc: Andrea della Porta Cc: Phil Elwell Fixes: faa3381267d0 ("arm64: dts: broadcom: Add minimal support for Raspberry Pi 5") Link: https://lore.kernel.org/r/20250924085612.1039247-1-pbrobinson@gmail.com Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/bcm2712.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi index e77a66adc22a..205b87f557d6 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi @@ -326,6 +326,8 @@ gicv2: interrupt-controller@7fff9000 { <0x7fffe000 0x2000>; interrupt-controller; #address-cells = <0>; + interrupts = ; #interrupt-cells = <3>; }; From 4adc20ba95d472a919f54d441663924e33c92279 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 5 Oct 2025 13:38:16 +0200 Subject: [PATCH 0043/1075] ARM: dts: broadcom: rpi: Switch to V3D firmware clock Until commit 919d6924ae9b ("clk: bcm: rpi: Turn firmware clock on/off when preparing/unpreparing") the clk-raspberrypi driver wasn't able to change the state of the V3D clock. Only the clk-bcm2835 was able to do this before. After this commit both drivers were able to work against each other, which could result in a system freeze. One step to avoid this conflict is to switch all V3D consumer to the firmware clock. Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/linux-arm-kernel/727aa0c8-2981-4662-adf3-69cac2da956d@samsung.com/ Fixes: 919d6924ae9b ("clk: bcm: rpi: Turn firmware clock on/off when preparing/unpreparing") Signed-off-by: Stefan Wahren Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20251005113816.6721-1-wahrenst@gmx.net Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi | 8 ++++++++ arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi index c78ed064d166..1eb6406449d1 100644 --- a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi @@ -77,6 +77,14 @@ &i2c0 { /delete-property/ pinctrl-0; }; +&pm { + clocks = <&firmware_clocks 5>, + <&clocks BCM2835_CLOCK_PERI_IMAGE>, + <&clocks BCM2835_CLOCK_H264>, + <&clocks BCM2835_CLOCK_ISP>; + clock-names = "v3d", "peri_image", "h264", "isp"; +}; + &rmem { /* * RPi4's co-processor will copy the board's bootloader configuration diff --git a/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi b/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi index 8b3c21d9f333..fa9d784c88b6 100644 --- a/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi @@ -13,7 +13,16 @@ &hdmi { clock-names = "pixel", "hdmi"; }; +&pm { + clocks = <&firmware_clocks 5>, + <&clocks BCM2835_CLOCK_PERI_IMAGE>, + <&clocks BCM2835_CLOCK_H264>, + <&clocks BCM2835_CLOCK_ISP>; + clock-names = "v3d", "peri_image", "h264", "isp"; +}; + &v3d { + clocks = <&firmware_clocks 5>; power-domains = <&power RPI_POWER_DOMAIN_V3D>; }; From 54e96258a6930909b690fd7e8889749231ba8085 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Oct 2025 15:35:36 -1000 Subject: [PATCH 0044/1075] sched_ext: Mark scx_bpf_dsq_move_set_[slice|vtime]() with KF_RCU scx_bpf_dsq_move_set_slice() and scx_bpf_dsq_move_set_vtime() take a DSQ iterator argument which has to be valid. Mark them with KF_RCU. Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") Cc: stable@vger.kernel.org # v6.12+ Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2b0e88206d07..fc353b8d69f7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5688,8 +5688,8 @@ BTF_KFUNCS_START(scx_kfunc_ids_dispatch) BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots) BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel) BTF_ID_FLAGS(func, scx_bpf_dsq_move_to_local) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU) BTF_KFUNCS_END(scx_kfunc_ids_dispatch) @@ -5820,8 +5820,8 @@ __bpf_kfunc_end_defs(); BTF_KFUNCS_START(scx_kfunc_ids_unlocked) BTF_ID_FLAGS(func, scx_bpf_create_dsq, KF_SLEEPABLE) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU) BTF_KFUNCS_END(scx_kfunc_ids_unlocked) From efeeaac9ae9763f9c953e69633c86bc3031e39b5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Oct 2025 13:56:23 -1000 Subject: [PATCH 0045/1075] sched_ext: Sync error_irq_work before freeing scx_sched By the time scx_sched_free_rcu_work() runs, the scx_sched is no longer reachable. However, a previously queued error_irq_work may still be pending or running. Ensure it completes before proceeding with teardown. Fixes: bff3b5aec1b7 ("sched_ext: Move disable machinery into scx_sched") Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index fc353b8d69f7..a79dfd0f743a 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3471,7 +3471,9 @@ static void scx_sched_free_rcu_work(struct work_struct *work) struct scx_dispatch_q *dsq; int node; + irq_work_sync(&sch->error_irq_work); kthread_stop(sch->helper->task); + free_percpu(sch->pcpu); for_each_node_state(node, N_POSSIBLE) From a8ad873113d3fe01f9b5d737d4b0570fa36826b0 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Fri, 10 Oct 2025 12:12:50 -0700 Subject: [PATCH 0046/1075] sched_ext: defer queue_balance_callback() until after ops.dispatch The sched_ext code calls queue_balance_callback() during enqueue_task() to defer operations that drop multiple locks until we can unpin them. The call assumes that the rq lock is held until the callbacks are invoked, and the pending callbacks will not be visible to any other threads. This is enforced by a WARN_ON_ONCE() in rq_pin_lock(). However, balance_one() may actually drop the lock during a BPF dispatch call. Another thread may win the race to get the rq lock and see the pending callback. To avoid this, sched_ext must only queue the callback after the dispatch calls have completed. CPU 0 CPU 1 CPU 2 scx_balance() rq_unpin_lock() scx_balance_one() |= IN_BALANCE scx_enqueue() ops.dispatch() rq_unlock() rq_lock() queue_balance_callback() rq_unlock() [WARN] rq_pin_lock() rq_lock() &= ~IN_BALANCE rq_repin_lock() Changelog v2-> v1 (https://lore.kernel.org/sched-ext/aOgOxtHCeyRT_7jn@gpd4) - Fixed explanation in patch description (Andrea) - Fixed scx_rq mask state updates (Andrea) - Added Reviewed-by tag from Andrea Reported-by: Jakub Kicinski Signed-off-by: Emil Tsalapatis (Meta) Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 29 +++++++++++++++++++++++++++-- kernel/sched/sched.h | 1 + 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index a79dfd0f743a..1352e6a5b089 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -780,13 +780,23 @@ static void schedule_deferred(struct rq *rq) if (rq->scx.flags & SCX_RQ_IN_WAKEUP) return; + /* Don't do anything if there already is a deferred operation. */ + if (rq->scx.flags & SCX_RQ_BAL_PENDING) + return; + /* * If in balance, the balance callbacks will be called before rq lock is * released. Schedule one. + * + * + * We can't directly insert the callback into the + * rq's list: The call can drop its lock and make the pending balance + * callback visible to unrelated code paths that call rq_pin_lock(). + * + * Just let balance_one() know that it must do it itself. */ if (rq->scx.flags & SCX_RQ_IN_BALANCE) { - queue_balance_callback(rq, &rq->scx.deferred_bal_cb, - deferred_bal_cb_workfn); + rq->scx.flags |= SCX_RQ_BAL_CB_PENDING; return; } @@ -2003,6 +2013,19 @@ static void flush_dispatch_buf(struct scx_sched *sch, struct rq *rq) dspc->cursor = 0; } +static inline void maybe_queue_balance_callback(struct rq *rq) +{ + lockdep_assert_rq_held(rq); + + if (!(rq->scx.flags & SCX_RQ_BAL_CB_PENDING)) + return; + + queue_balance_callback(rq, &rq->scx.deferred_bal_cb, + deferred_bal_cb_workfn); + + rq->scx.flags &= ~SCX_RQ_BAL_CB_PENDING; +} + static int balance_one(struct rq *rq, struct task_struct *prev) { struct scx_sched *sch = scx_root; @@ -2150,6 +2173,8 @@ static int balance_scx(struct rq *rq, struct task_struct *prev, #endif rq_repin_lock(rq, rf); + maybe_queue_balance_callback(rq); + return ret; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1f5d07067f60..3f7fab3d7960 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -784,6 +784,7 @@ enum scx_rq_flags { SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */ SCX_RQ_BYPASSING = 1 << 4, SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */ + SCX_RQ_BAL_CB_PENDING = 1 << 6, /* must queue a cb after dispatching */ SCX_RQ_IN_WAKEUP = 1 << 16, SCX_RQ_IN_BALANCE = 1 << 17, From 14c1da3895a116f4e32c20487046655f26d3999b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 8 Oct 2025 13:43:26 -1000 Subject: [PATCH 0047/1075] sched_ext: Allocate scx_kick_cpus_pnt_seqs lazily using kvzalloc() On systems with >4096 CPUs, scx_kick_cpus_pnt_seqs allocation fails during boot because it exceeds the 32,768 byte percpu allocator limit. Restructure to use DEFINE_PER_CPU() for the per-CPU pointers, with each CPU pointing to its own kvzalloc'd array. Move allocation from boot time to scx_enable() and free in scx_disable(), so the O(nr_cpu_ids^2) memory is only consumed when sched_ext is active. Use RCU to guard against racing with free. Arrays are freed via call_rcu() and kick_cpus_irq_workfn() uses rcu_dereference_bh() with a NULL check. While at it, rename to scx_kick_pseqs for brevity and update comments to clarify these are pick_task sequence numbers. v2: RCU protect scx_kick_seqs to manage kick_cpus_irq_workfn() racing against disable as per Andrea. v3: Fix bugs notcied by Andrea. Reported-by: Phil Auld Link: http://lkml.kernel.org/r/20251007133523.GA93086@pauld.westford.csb Cc: Andrea Righi Reviewed-by: Emil Tsalapatis Reviewed-by: Phil Auld Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 89 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 10 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 1352e6a5b089..c645d47124e7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -67,8 +67,19 @@ static unsigned long scx_watchdog_timestamp = INITIAL_JIFFIES; static struct delayed_work scx_watchdog_work; -/* for %SCX_KICK_WAIT */ -static unsigned long __percpu *scx_kick_cpus_pnt_seqs; +/* + * For %SCX_KICK_WAIT: Each CPU has a pointer to an array of pick_task sequence + * numbers. The arrays are allocated with kvzalloc() as size can exceed percpu + * allocator limits on large machines. O(nr_cpu_ids^2) allocation, allocated + * lazily when enabling and freed when disabling to avoid waste when sched_ext + * isn't active. + */ +struct scx_kick_pseqs { + struct rcu_head rcu; + unsigned long seqs[]; +}; + +static DEFINE_PER_CPU(struct scx_kick_pseqs __rcu *, scx_kick_pseqs); /* * Direct dispatch marker. @@ -3877,6 +3888,27 @@ static const char *scx_exit_reason(enum scx_exit_kind kind) } } +static void free_kick_pseqs_rcu(struct rcu_head *rcu) +{ + struct scx_kick_pseqs *pseqs = container_of(rcu, struct scx_kick_pseqs, rcu); + + kvfree(pseqs); +} + +static void free_kick_pseqs(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct scx_kick_pseqs **pseqs = per_cpu_ptr(&scx_kick_pseqs, cpu); + struct scx_kick_pseqs *to_free; + + to_free = rcu_replace_pointer(*pseqs, NULL, true); + if (to_free) + call_rcu(&to_free->rcu, free_kick_pseqs_rcu); + } +} + static void scx_disable_workfn(struct kthread_work *work) { struct scx_sched *sch = container_of(work, struct scx_sched, disable_work); @@ -4013,6 +4045,7 @@ static void scx_disable_workfn(struct kthread_work *work) free_percpu(scx_dsp_ctx); scx_dsp_ctx = NULL; scx_dsp_max_batch = 0; + free_kick_pseqs(); mutex_unlock(&scx_enable_mutex); @@ -4375,6 +4408,33 @@ static void scx_vexit(struct scx_sched *sch, irq_work_queue(&sch->error_irq_work); } +static int alloc_kick_pseqs(void) +{ + int cpu; + + /* + * Allocate per-CPU arrays sized by nr_cpu_ids. Use kvzalloc as size + * can exceed percpu allocator limits on large machines. + */ + for_each_possible_cpu(cpu) { + struct scx_kick_pseqs **pseqs = per_cpu_ptr(&scx_kick_pseqs, cpu); + struct scx_kick_pseqs *new_pseqs; + + WARN_ON_ONCE(rcu_access_pointer(*pseqs)); + + new_pseqs = kvzalloc_node(struct_size(new_pseqs, seqs, nr_cpu_ids), + GFP_KERNEL, cpu_to_node(cpu)); + if (!new_pseqs) { + free_kick_pseqs(); + return -ENOMEM; + } + + rcu_assign_pointer(*pseqs, new_pseqs); + } + + return 0; +} + static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops) { struct scx_sched *sch; @@ -4517,15 +4577,19 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) mutex_lock(&scx_enable_mutex); + ret = alloc_kick_pseqs(); + if (ret) + goto err_unlock; + if (scx_enable_state() != SCX_DISABLED) { ret = -EBUSY; - goto err_unlock; + goto err_free_pseqs; } sch = scx_alloc_and_add_sched(ops); if (IS_ERR(sch)) { ret = PTR_ERR(sch); - goto err_unlock; + goto err_free_pseqs; } /* @@ -4728,6 +4792,8 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) return 0; +err_free_pseqs: + free_kick_pseqs(); err_unlock: mutex_unlock(&scx_enable_mutex); return ret; @@ -5109,10 +5175,18 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work) { struct rq *this_rq = this_rq(); struct scx_rq *this_scx = &this_rq->scx; - unsigned long *pseqs = this_cpu_ptr(scx_kick_cpus_pnt_seqs); + struct scx_kick_pseqs __rcu *pseqs_pcpu = __this_cpu_read(scx_kick_pseqs); bool should_wait = false; + unsigned long *pseqs; s32 cpu; + if (unlikely(!pseqs_pcpu)) { + pr_warn_once("kick_cpus_irq_workfn() called with NULL scx_kick_pseqs"); + return; + } + + pseqs = rcu_dereference_bh(pseqs_pcpu)->seqs; + for_each_cpu(cpu, this_scx->cpus_to_kick) { should_wait |= kick_one_cpu(cpu, this_rq, pseqs); cpumask_clear_cpu(cpu, this_scx->cpus_to_kick); @@ -5235,11 +5309,6 @@ void __init init_sched_ext_class(void) scx_idle_init_masks(); - scx_kick_cpus_pnt_seqs = - __alloc_percpu(sizeof(scx_kick_cpus_pnt_seqs[0]) * nr_cpu_ids, - __alignof__(scx_kick_cpus_pnt_seqs[0])); - BUG_ON(!scx_kick_cpus_pnt_seqs); - for_each_possible_cpu(cpu) { struct rq *rq = cpu_rq(cpu); int n = cpu_to_node(cpu); From 02e7567f5da023524476053a38c54f4f19130959 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Wed, 1 Oct 2025 14:03:37 +0800 Subject: [PATCH 0048/1075] cxl/port: Avoid missing port component registers setup port->nr_dports is used to represent how many dports added to the cxl port, it will increase in add_dport() when a new dport is being added to the cxl port, but it will not be reduced when a dport is removed from the cxl port. Currently, when the first dport is added to a cxl port, it will trigger component registers setup on the cxl port, the implementation is using port->nr_dports to confirm if the dport is the first dport. A corner case here is that adding dport could fail after port->nr_dports updating and before checking port->nr_dports for component registers setup. If the failure happens during the first dport attaching, it will cause that CXL subsystem has not chance to execute component registers setup for the cxl port. the failure flow like below: port->nr_dports = 0 dport 1 adding to the port: add_dport() # port->nr_dports: 1 failed on devm_add_action_or_reset() or sysfs_create_link() return error # port->nr_dports: 1 dport 2 adding to the port: add_dport() # port->nr_dports: 2 no failure skip component registers setup because of port->nr_dports is 2 The solution here is that moving component registers setup closer to add_dport(), so if add_dport() is executed correctly for the first dport, component registers setup on the port will be executed immediately after that. Fixes: f6ee24913de2 ("cxl: Move port register setup to when first dport appear") Signed-off-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang --- drivers/cxl/core/port.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index d5f71eb1ade8..8128fd2b5b31 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1182,6 +1182,20 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + /* + * Setup port register if this is the first dport showed up. Having + * a dport also means that there is at least 1 active link. + */ + if (port->nr_dports == 1 && + port->component_reg_phys != CXL_RESOURCE_NONE) { + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) { + xa_erase(&port->dports, (unsigned long)dport->dport_dev); + return ERR_PTR(rc); + } + port->component_reg_phys = CXL_RESOURCE_NONE; + } + get_device(dport_dev); rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); if (rc) @@ -1200,18 +1214,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, cxl_debugfs_create_dport_dir(dport); - /* - * Setup port register if this is the first dport showed up. Having - * a dport also means that there is at least 1 active link. - */ - if (port->nr_dports == 1 && - port->component_reg_phys != CXL_RESOURCE_NONE) { - rc = cxl_port_setup_regs(port, port->component_reg_phys); - if (rc) - return ERR_PTR(rc); - port->component_reg_phys = CXL_RESOURCE_NONE; - } - return dport; } From 15292f1b4c55a3a7c940dbcb6cb8793871ed3d92 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Fri, 10 Oct 2025 12:08:35 -0500 Subject: [PATCH 0049/1075] x86/resctrl: Fix miscount of bandwidth event when reactivating previously unavailable RMID Users can create as many monitoring groups as the number of RMIDs supported by the hardware. However, on AMD systems, only a limited number of RMIDs are guaranteed to be actively tracked by the hardware. RMIDs that exceed this limit are placed in an "Unavailable" state. When a bandwidth counter is read for such an RMID, the hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62). When such an RMID starts being tracked again the hardware counter is reset to zero. MSR_IA32_QM_CTR.Unavailable remains set on first read after tracking re-starts and is clear on all subsequent reads as long as the RMID is tracked. resctrl miscounts the bandwidth events after an RMID transitions from the "Unavailable" state back to being tracked. This happens because when the hardware starts counting again after resetting the counter to zero, resctrl in turn compares the new count against the counter value stored from the previous time the RMID was tracked. This results in resctrl computing an event value that is either undercounting (when new counter is more than stored counter) or a mistaken overflow (when new counter is less than stored counter). Reset the stored value (arch_mbm_state::prev_msr) of MSR_IA32_QM_CTR to zero whenever the RMID is in the "Unavailable" state to ensure accurate counting after the RMID resets to zero when it starts to be tracked again. Example scenario that results in mistaken overflow ================================================== 1. The resctrl filesystem is mounted, and a task is assigned to a monitoring group. $mount -t resctrl resctrl /sys/fs/resctrl $mkdir /sys/fs/resctrl/mon_groups/test1/ $echo 1234 > /sys/fs/resctrl/mon_groups/test1/tasks $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 21323 <- Total bytes on domain 0 "Unavailable" <- Total bytes on domain 1 Task is running on domain 0. Counter on domain 1 is "Unavailable". 2. The task runs on domain 0 for a while and then moves to domain 1. The counter starts incrementing on domain 1. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 7345357 <- Total bytes on domain 0 4545 <- Total bytes on domain 1 3. At some point, the RMID in domain 0 transitions to the "Unavailable" state because the task is no longer executing in that domain. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes "Unavailable" <- Total bytes on domain 0 434341 <- Total bytes on domain 1 4. Since the task continues to migrate between domains, it may eventually return to domain 0. $cat /sys/fs/resctrl/mon_groups/test1/mon_data/mon_L3_*/mbm_total_bytes 17592178699059 <- Overflow on domain 0 3232332 <- Total bytes on domain 1 In this case, the RMID on domain 0 transitions from "Unavailable" state to active state. The hardware sets MSR_IA32_QM_CTR.Unavailable (bit 62) when the counter is read and begins tracking the RMID counting from 0. Subsequent reads succeed but return a value smaller than the previously saved MSR value (7345357). Consequently, the resctrl's overflow logic is triggered, it compares the previous value (7345357) with the new, smaller value and incorrectly interprets this as a counter overflow, adding a large delta. In reality, this is a false positive: the counter did not overflow but was simply reset when the RMID transitioned from "Unavailable" back to active state. Here is the text from APM [1] available from [2]. "In PQOS Version 2.0 or higher, the MBM hardware will set the U bit on the first QM_CTR read when it begins tracking an RMID that it was not previously tracking. The U bit will be zero for all subsequent reads from that RMID while it is still tracked by the hardware. Therefore, a QM_CTR read with the U bit set when that RMID is in use by a processor can be considered 0 when calculating the difference with a subsequent read." [1] AMD64 Architecture Programmer's Manual Volume 2: System Programming Publication # 24593 Revision 3.41 section 19.3.3 Monitoring L3 Memory Bandwidth (MBM). [ bp: Split commit message into smaller paragraph chunks for better consumption. ] Fixes: 4d05bf71f157d ("x86/resctrl: Introduce AMD QOS feature") Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Reinette Chatre Cc: stable@vger.kernel.org # needs adjustments for <= v6.17 Link: https://bugzilla.kernel.org/show_bug.cgi?id=206537 # [2] --- arch/x86/kernel/cpu/resctrl/monitor.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index c8945610d455..2cd25a0d4637 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -242,7 +242,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, u32 unused, u32 rmid, enum resctrl_event_id eventid, u64 *val, void *ignored) { + struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); int cpu = cpumask_any(&d->hdr.cpu_mask); + struct arch_mbm_state *am; u64 msr_val; u32 prmid; int ret; @@ -251,12 +253,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d, prmid = logical_rmid_to_physical_rmid(cpu, rmid); ret = __rmid_read_phys(prmid, eventid, &msr_val); - if (ret) - return ret; - *val = get_corrected_val(r, d, rmid, eventid, msr_val); + if (!ret) { + *val = get_corrected_val(r, d, rmid, eventid, msr_val); + } else if (ret == -EINVAL) { + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) + am->prev_msr = 0; + } - return 0; + return ret; } static int __cntr_id_read(u32 cntr_id, u64 *val) From a375246fcf2bbdaeb1df7fa7ee5a8b884a89085e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 9 Oct 2025 08:40:01 -0700 Subject: [PATCH 0050/1075] cxl/features: Add check for no entries in cxl_feature_info cxl EDAC calls cxl_feature_info() to get the feature information and if the hardware has no Features support, cxlfs may be passed in as NULL. [ 51.957498] BUG: kernel NULL pointer dereference, address: 0000000000000008 [ 51.965571] #PF: supervisor read access in kernel mode [ 51.971559] #PF: error_code(0x0000) - not-present page [ 51.977542] PGD 17e4f6067 P4D 0 [ 51.981384] Oops: Oops: 0000 [#1] SMP NOPTI [ 51.986300] CPU: 49 UID: 0 PID: 3782 Comm: systemd-udevd Not tainted 6.17.0dj test+ #64 PREEMPT(voluntary) [ 51.997355] Hardware name: [ 52.009790] RIP: 0010:cxl_feature_info+0xa/0x80 [cxl_core] Add a check for cxlfs before dereferencing it and return -EOPNOTSUPP if there is no cxlfs created due to no hardware support. Fixes: eb5dfcb9e36d ("cxl: Add support to handle user feature commands for set feature") Reviewed-by: Davidlohr Bueso Reviewed-by: Alison Schofield Signed-off-by: Dave Jiang --- drivers/cxl/core/features.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/core/features.c b/drivers/cxl/core/features.c index 7c750599ea69..4bc484b46f43 100644 --- a/drivers/cxl/core/features.c +++ b/drivers/cxl/core/features.c @@ -371,6 +371,9 @@ cxl_feature_info(struct cxl_features_state *cxlfs, { struct cxl_feat_entry *feat; + if (!cxlfs || !cxlfs->entries) + return ERR_PTR(-EOPNOTSUPP); + for (int i = 0; i < cxlfs->entries->num_features; i++) { feat = &cxlfs->entries->ent[i]; if (uuid_equal(uuid, &feat->uuid)) From f25785f9b088ed65089dd0d0034da52858417839 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Sun, 5 Oct 2025 23:48:05 -0400 Subject: [PATCH 0051/1075] x86/mm: Fix overflow in __cpa_addr() The change to have cpa_flush() call flush_kernel_pages() introduced a bug where __cpa_addr() can access an address one larger than the largest one in the cpa->pages array. KASAN reports the issue like this: BUG: KASAN: slab-out-of-bounds in __cpa_addr arch/x86/mm/pat/set_memory.c:309 [inline] BUG: KASAN: slab-out-of-bounds in __cpa_addr+0x1d3/0x220 arch/x86/mm/pat/set_memory.c:306 Read of size 8 at addr ffff88801f75e8f8 by task syz.0.17/5978 This bug could cause cpa_flush() to not properly flush memory, which somehow never showed any symptoms in my tests, possibly because cpa_flush() is called so rarely, but could potentially cause issues for other people. Fix the issue by directly calculating the flush end address from the start address. Fixes: 86e6815b316e ("x86/mm: Change cpa_flush() to call flush_kernel_range() directly") Reported-by: syzbot+afec6555eef563c66c97@syzkaller.appspotmail.com Signed-off-by: Rik van Riel Signed-off-by: Dave Hansen Reviewed-by: Kiryl Shutsemau Link: https://lore.kernel.org/all/68e2ff90.050a0220.2c17c1.0038.GAE@google.com/ --- arch/x86/mm/pat/set_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index d2d54b8c4dbb..970981893c9b 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -446,7 +446,7 @@ static void cpa_flush(struct cpa_data *cpa, int cache) } start = fix_addr(__cpa_addr(cpa, 0)); - end = fix_addr(__cpa_addr(cpa, cpa->numpages)); + end = start + cpa->numpages * PAGE_SIZE; if (cpa->force_flush_all) end = TLB_FLUSH_ALL; From 83b0177a6c4889b3a6e865da5e21b2c9d97d0551 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 May 2025 15:43:04 +0200 Subject: [PATCH 0052/1075] x86/mm: Fix SMP ordering in switch_mm_irqs_off() Stephen noted that it is possible to not have an smp_mb() between the loaded_mm store and the tlb_gen load in switch_mm(), meaning the ordering against flush_tlb_mm_range() goes out the window, and it becomes possible for switch_mm() to not observe a recent tlb_gen update and fail to flush the TLBs. [ dhansen: merge conflict fixed by Ingo ] Fixes: 209954cbc7d0 ("x86/mm/tlb: Update mm_cpumask lazily") Reported-by: Stephen Dolan Closes: https://lore.kernel.org/all/CAHDw0oGd0B4=uuv8NGqbUQ_ZVmSheU2bN70e4QhFXWvuAZdt2w@mail.gmail.com/ Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Signed-off-by: Dave Hansen --- arch/x86/mm/tlb.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 39f80111e6f1..5d221709353e 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -911,11 +911,31 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next, * CR3 and cpu_tlbstate.loaded_mm are not all in sync. */ this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING); - barrier(); - /* Start receiving IPIs and then read tlb_gen (and LAM below) */ + /* + * Make sure this CPU is set in mm_cpumask() such that we'll + * receive invalidation IPIs. + * + * Rely on the smp_mb() implied by cpumask_set_cpu()'s atomic + * operation, or explicitly provide one. Such that: + * + * switch_mm_irqs_off() flush_tlb_mm_range() + * smp_store_release(loaded_mm, SWITCHING); atomic64_inc_return(tlb_gen) + * smp_mb(); // here // smp_mb() implied + * atomic64_read(tlb_gen); this_cpu_read(loaded_mm); + * + * we properly order against flush_tlb_mm_range(), where the + * loaded_mm load can happen in mative_flush_tlb_multi() -> + * should_flush_tlb(). + * + * This way switch_mm() must see the new tlb_gen or + * flush_tlb_mm_range() must see the new loaded_mm, or both. + */ if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next))) cpumask_set_cpu(cpu, mm_cpumask(next)); + else + smp_mb(); + next_tlb_gen = atomic64_read(&next->context.tlb_gen); ns = choose_new_asid(next, next_tlb_gen); From d6fc45100aa8c02be3ddd16fae569b84086c15a9 Mon Sep 17 00:00:00 2001 From: Hans Zhang <18255117159@163.com> Date: Fri, 10 Oct 2025 22:43:07 +0800 Subject: [PATCH 0053/1075] PCI: cadence: Search for MSI Capability with correct ID 907912c1daa7 ("PCI: cadence: Use cdns_pcie_find_*capability() to avoid hardcoding offsets") incorrectly searched for the MSI-X Capability ID instead of the MSI Capability ID in cdns_pcie_ep_get_msi(). Search for PCI_CAP_ID_MSI, not PCI_CAP_ID_MSIX, to fix this problem. Fixes: 907912c1daa7 ("PCI: cadence: Use cdns_pcie_find_*capability() to avoid hardcoding offsets") Reported-by: Sasha Levin Closes: https://lore.kernel.org/r/aOfMk9BW8BH2P30V@laps/ Signed-off-by: Hans Zhang <18255117159@163.com> Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251010144307.12979-1-18255117159@163.com --- drivers/pci/controller/cadence/pcie-cadence-ep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/cadence/pcie-cadence-ep.c b/drivers/pci/controller/cadence/pcie-cadence-ep.c index 1eac012a8226..c0e1194a936b 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-ep.c +++ b/drivers/pci/controller/cadence/pcie-cadence-ep.c @@ -255,7 +255,7 @@ static int cdns_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn) u16 flags, mme; u8 cap; - cap = cdns_pcie_find_capability(pcie, PCI_CAP_ID_MSIX); + cap = cdns_pcie_find_capability(pcie, PCI_CAP_ID_MSI); fn = cdns_pcie_get_fn_from_vfn(pcie, fn, vfn); /* Validate that the MSI feature is actually enabled. */ From 8607edcd1748503f4f58e66ca0216170f260c79b Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 14 Oct 2025 01:55:40 +0300 Subject: [PATCH 0054/1075] usb: xhci-pci: Fix USB2-only root hub registration A recent change to hide USB3 root hubs of USB2-only controllers broke registration of USB2 root hubs - allow_single_roothub is set too late, and by this time xhci_run() has already deferred root hub registration until after the shared HCD is added, which will never happen. This makes such controllers unusable, but testers didn't notice since they were only bothered by warnings about empty USB3 root hubs. The bug causes problems to other people who actually use such HCs and I was able to confirm it on an ordinary HC by patching to ignore USB3 ports. Setting allow_single_roothub during early setup fixes things. Reported-by: Arisa Snowbell Closes: https://lore.kernel.org/linux-usb/CABpa4MA9unucCoKtSdzJyOLjHNVy+Cwgz5AnAxPkKw6vuox1Nw@mail.gmail.com/ Reported-by: Michal Kubecek Closes: https://lore.kernel.org/linux-usb/lnb5bum7dnzkn3fc7gq6hwigslebo7o4ccflcvsc3lvdgnu7el@fvqpobbdoapl/ Fixes: 719de070f764 ("usb: xhci-pci: add support for hosts with zero USB3 ports") Tested-by: Arisa Snowbell Tested-by: Michal Kubecek Suggested-by: Mathias Nyman Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5c8ab519f497..f67a4d956204 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -582,6 +582,8 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (!usb_hcd_is_primary_hcd(hcd)) return 0; + xhci->allow_single_roothub = 1; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) xhci_pme_acpi_rtd3_enable(pdev); @@ -637,7 +639,6 @@ int xhci_pci_common_probe(struct pci_dev *dev, const struct pci_device_id *id) xhci = hcd_to_xhci(hcd); xhci->reset = reset; - xhci->allow_single_roothub = 1; if (!xhci_has_one_roothub(xhci)) { xhci->shared_hcd = usb_create_shared_hcd(&xhci_pci_hc_driver, &dev->dev, pci_name(dev), hcd); From f3d12ec847b945d5d65846c85f062d07d5e73164 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 14 Oct 2025 01:55:41 +0300 Subject: [PATCH 0055/1075] xhci: dbc: fix bogus 1024 byte prefix if ttyDBC read races with stall event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DbC may add 1024 bogus bytes to the beginneing of the receiving endpoint if DbC hw triggers a STALL event before any Transfer Blocks (TRBs) for incoming data are queued, but driver handles the event after it queued the TRBs. This is possible as xHCI DbC hardware may trigger spurious STALL transfer events even if endpoint is empty. The STALL event contains a pointer to the stalled TRB, and "remaining" untransferred data length. As there are no TRBs queued yet the STALL event will just point to first TRB position of the empty ring, with '0' bytes remaining untransferred. DbC driver is polling for events, and may not handle the STALL event before /dev/ttyDBC0 is opened and incoming data TRBs are queued. The DbC event handler will now assume the first queued TRB (length 1024) has stalled with '0' bytes remaining untransferred, and copies the data This race situation can be practically mitigated by making sure the event handler handles all pending transfer events when DbC reaches configured state, and only then create dev/ttyDbC0, and start queueing transfers. The event handler can this way detect the STALL events on empty rings and discard them before any transfers are queued. This does in practice solve the issue, but still leaves a small possible gap for the race to trigger. We still need a way to distinguish spurious STALLs on empty rings with '0' bytes remaing, from actual STALL events with all bytes transmitted. Cc: stable Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver") Tested-by: Łukasz Bartosik Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 63edf2d8f245..023a8ec6f305 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -892,7 +892,8 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) dev_info(dbc->dev, "DbC configured\n"); portsc = readl(&dbc->regs->portsc); writel(portsc, &dbc->regs->portsc); - return EVT_GSER; + ret = EVT_GSER; + break; } return EVT_DONE; @@ -954,7 +955,8 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) break; case TRB_TYPE(TRB_TRANSFER): dbc_handle_xfer_event(dbc, evt); - ret = EVT_XFER_DONE; + if (ret != EVT_GSER) + ret = EVT_XFER_DONE; break; default: break; From 2bbd38fcd29670e46c0fdb9cd0e90507a8a1bf6a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 14 Oct 2025 01:55:42 +0300 Subject: [PATCH 0056/1075] xhci: dbc: enable back DbC in resume if it was enabled before suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DbC is currently only enabled back if it's in configured state during suspend. If system is suspended after DbC is enabled, but before the device is properly enumerated by the host, then DbC would not be enabled back in resume. Always enable DbC back in resume if it's suspended in enabled, connected, or configured state Cc: stable Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver") Tested-by: Łukasz Bartosik Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 023a8ec6f305..ecda964e018a 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -1392,8 +1392,15 @@ int xhci_dbc_suspend(struct xhci_hcd *xhci) if (!dbc) return 0; - if (dbc->state == DS_CONFIGURED) + switch (dbc->state) { + case DS_ENABLED: + case DS_CONNECTED: + case DS_CONFIGURED: dbc->resume_required = 1; + break; + default: + break; + } xhci_dbc_stop(dbc); From 62685ab071de7c39499212bff19f1b5bc0148bc7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 1 Oct 2025 15:24:49 +0200 Subject: [PATCH 0057/1075] uprobe: Move arch_uprobe_optimize right after handlers execution It's less confusing to optimize uprobe right after handlers execution and before we do the check for changed ip register to avoid situations where changed ip register would skip uprobe optimization. Suggested-by: Linus Torvalds Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Acked-by: Oleg Nesterov --- kernel/events/uprobes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 8709c69118b5..f11ceb8be8c4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -2765,6 +2765,9 @@ static void handle_swbp(struct pt_regs *regs) handler_chain(uprobe, regs); + /* Try to optimize after first hit. */ + arch_uprobe_optimize(&uprobe->arch, bp_vaddr); + /* * If user decided to take execution elsewhere, it makes little sense * to execute the original instruction, so let's skip it. @@ -2772,9 +2775,6 @@ static void handle_swbp(struct pt_regs *regs) if (instruction_pointer(regs) != bp_vaddr) goto out; - /* Try to optimize after first hit. */ - arch_uprobe_optimize(&uprobe->arch, bp_vaddr); - if (arch_uprobe_skip_sstep(&uprobe->arch, regs)) goto out; From ebfc8542ad62d066771e46c8aa30f5624b89cad8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:42 +0300 Subject: [PATCH 0058/1075] perf/core: Fix address filter match with backing files It was reported that Intel PT address filters do not work in Docker containers. That relates to the use of overlayfs. overlayfs records the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. In order for an address filter to match, it must compare to the user file inode. There is an existing helper file_user_inode() for that situation. Use file_user_inode() instead of file_inode() to get the inode for address filter matching. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record --buildid-mmap -e intel_pt//u --filter 'filter * @ /root/test/merged/cat' -- /root/test/merged/cat /proc/self/maps ... 55d61d246000-55d61d2e1000 r-xp 00018000 00:1a 3418 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.015 MB perf.data ] # perf buildid-cache --add /root/test/merged/cat Before: Address filter does not match so there are no control flow packets # perf script --itrace=e # perf script --itrace=b | wc -l 0 # perf script -D | grep 'TIP.PGE' | wc -l 0 # After: Address filter does match so there are control flow packets # perf script --itrace=e # perf script --itrace=b | wc -l 235 # perf script -D | grep 'TIP.PGE' | wc -l 57 # With respect to stable kernels, overlayfs mmap function ovl_mmap() was added in v4.19 but file_user_inode() was not added until v6.8 and never back-ported to stable kernels. FMODE_BACKING that it depends on was added in v6.5. This issue has gone largely unnoticed, so back-porting before v6.8 is probably not worth it, so put 6.8 as the stable kernel prerequisite version, although in practice the next long term kernel is 6.12. Closes: https://lore.kernel.org/linux-perf-users/aBCwoq7w8ohBRQCh@fremen.lan Reported-by: Edd Barrett Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org # 6.8 --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7541f6f85fcb..cd63ec84e386 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9492,7 +9492,7 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, if (!filter->path.dentry) return false; - if (d_inode(filter->path.dentry) != file_inode(file)) + if (d_inode(filter->path.dentry) != file_user_inode(file)) return false; if (filter->offset > offset + size) From 8818f507a9391019a3ec7c57b1a32e4b386e48a5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:43 +0300 Subject: [PATCH 0059/1075] perf/core: Fix MMAP event path names with backing files Some file systems like FUSE-based ones or overlayfs may record the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. Since commit def3ae83da02f ("fs: store real path instead of fake path in backing file f_path"), file_path() no longer returns the user file path when applied to a backing file. There is an existing helper file_user_path() for that situation. Use file_user_path() instead of file_path() to get the path for MMAP and MMAP2 events. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record -e intel_pt//u -- /root/test/merged/cat /proc/self/maps ... 55b0ba399000-55b0ba434000 r-xp 00018000 00:1a 3419 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.060 MB perf.data ] # Before: File name is wrong (/cat), so decoding fails: # perf script --no-itrace --show-mmap-events cat 367 [016] 100.491492: PERF_RECORD_MMAP2 367/367: [0x55b0ba399000(0x9b000) @ 0x18000 00:02 3419 489959280]: r-xp /cat ... # perf script --itrace=e | wc -l Warning: 19 instruction trace errors 19 # After: File name is correct (/root/test/merged/cat), so decoding is ok: # perf script --no-itrace --show-mmap-events cat 364 [016] 72.153006: PERF_RECORD_MMAP2 364/364: [0x55ce4003d000(0x9b000) @ 0x18000 00:02 3419 3132534314]: r-xp /root/test/merged/cat # perf script --itrace=e # perf script --itrace=e | wc -l 0 # Fixes: def3ae83da02f ("fs: store real path instead of fake path in backing file f_path") Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index cd63ec84e386..7b5c2373a8d7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9416,7 +9416,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) * need to add enough zero bytes after the string to handle * the 64bit alignment we do later. */ - name = file_path(file, buf, PATH_MAX - sizeof(u64)); + name = d_path(file_user_path(file), buf, PATH_MAX - sizeof(u64)); if (IS_ERR(name)) { name = "//toolong"; goto cpy_name; From fa4f4bae893fbce8a3edfff1ab7ece0c01dc1328 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Oct 2025 10:22:44 +0300 Subject: [PATCH 0060/1075] perf/core: Fix MMAP2 event device with backing files Some file systems like FUSE-based ones or overlayfs may record the backing file in struct vm_area_struct vm_file, instead of the user file that the user mmapped. That causes perf to misreport the device major/minor numbers of the file system of the file, and the generation of the file, and potentially other inode details. There is an existing helper file_user_inode() for that situation. Use file_user_inode() instead of file_inode() to get the inode for MMAP2 events. Example: Setup: # cd /root # mkdir test ; cd test ; mkdir lower upper work merged # cp `which cat` lower # mount -t overlay overlay -olowerdir=lower,upperdir=upper,workdir=work merged # perf record -e cycles:u -- /root/test/merged/cat /proc/self/maps ... 55b2c91d0000-55b2c926b000 r-xp 00018000 00:1a 3419 /root/test/merged/cat ... [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.004 MB perf.data (5 samples) ] # # stat /root/test/merged/cat File: /root/test/merged/cat Size: 1127792 Blocks: 2208 IO Block: 4096 regular file Device: 0,26 Inode: 3419 Links: 1 Access: (0755/-rwxr-xr-x) Uid: ( 0/ root) Gid: ( 0/ root) Access: 2025-09-08 12:23:59.453309624 +0000 Modify: 2025-09-08 12:23:59.454309624 +0000 Change: 2025-09-08 12:23:59.454309624 +0000 Birth: 2025-09-08 12:23:59.453309624 +0000 Before: Device reported 00:02 differs from stat output and /proc/self/maps # perf script --show-mmap-events | grep /root/test/merged/cat cat 377 [-01] 243.078558: PERF_RECORD_MMAP2 377/377: [0x55b2c91d0000(0x9b000) @ 0x18000 00:02 3419 2068525940]: r-xp /root/test/merged/cat After: Device reported 00:1a is the same as stat output and /proc/self/maps # perf script --show-mmap-events | grep /root/test/merged/cat cat 362 [-01] 127.755167: PERF_RECORD_MMAP2 362/362: [0x55ba6e781000(0x9b000) @ 0x18000 00:1a 3419 0]: r-xp /root/test/merged/cat With respect to stable kernels, overlayfs mmap function ovl_mmap() was added in v4.19 but file_user_inode() was not added until v6.8 and never back-ported to stable kernels. FMODE_BACKING that it depends on was added in v6.5. This issue has gone largely unnoticed, so back-porting before v6.8 is probably not worth it, so put 6.8 as the stable kernel prerequisite version, although in practice the next long term kernel is 6.12. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Acked-by: Amir Goldstein Cc: stable@vger.kernel.org # 6.8 --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 7b5c2373a8d7..177e57c1a362 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9403,7 +9403,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) flags |= MAP_HUGETLB; if (file) { - struct inode *inode; + const struct inode *inode; dev_t dev; buf = kmalloc(PATH_MAX, GFP_KERNEL); @@ -9421,7 +9421,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) name = "//toolong"; goto cpy_name; } - inode = file_inode(vma->vm_file); + inode = file_user_inode(vma->vm_file); dev = inode->i_sb->s_dev; ino = inode->i_ino; gen = inode->i_generation; From ee6e44dfe6e50b4a5df853d933a96bdff5309e6e Mon Sep 17 00:00:00 2001 From: "Peter Zijlstra (Intel)" Date: Fri, 10 Oct 2025 00:17:27 +0530 Subject: [PATCH 0061/1075] sched/deadline: Stop dl_server before CPU goes offline IBM CI tool reported kernel warning[1] when running a CPU removal operation through drmgr[2]. i.e "drmgr -c cpu -r -q 1" WARNING: CPU: 0 PID: 0 at kernel/sched/cpudeadline.c:219 cpudl_set+0x58/0x170 NIP [c0000000002b6ed8] cpudl_set+0x58/0x170 LR [c0000000002b7cb8] dl_server_timer+0x168/0x2a0 Call Trace: [c000000002c2f8c0] init_stack+0x78c0/0x8000 (unreliable) [c0000000002b7cb8] dl_server_timer+0x168/0x2a0 [c00000000034df84] __hrtimer_run_queues+0x1a4/0x390 [c00000000034f624] hrtimer_interrupt+0x124/0x300 [c00000000002a230] timer_interrupt+0x140/0x320 Git bisects to: commit 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck") This happens since: - dl_server hrtimer gets enqueued close to cpu offline, when kthread_park enqueues a fair task. - CPU goes offline and drmgr removes it from cpu_present_mask. - hrtimer fires and warning is hit. Fix it by stopping the dl_server before CPU is marked dead. [1]: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com/ [2]: https://github.com/ibm-power-utilities/powerpc-utils/tree/next/src/drmgr [sshegde: wrote the changelog and tested it] Fixes: 4ae8d9aa9f9d ("sched/deadline: Fix dl_server getting stuck") Closes: https://lore.kernel.org/all/8218e149-7718-4432-9312-f97297c352b9@linux.ibm.com Signed-off-by: Peter Zijlstra (Intel) Reported-by: Venkat Rao Bagalkote Signed-off-by: Shrikanth Hegde Signed-off-by: Peter Zijlstra (Intel) Tested-by: Marek Szyprowski Tested-by: Shrikanth Hegde --- kernel/sched/core.c | 2 ++ kernel/sched/deadline.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 198d2dd45f59..f1ebf67b48e2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8571,10 +8571,12 @@ int sched_cpu_dying(unsigned int cpu) sched_tick_stop(cpu); rq_lock_irqsave(rq, &rf); + update_rq_clock(rq); if (rq->nr_running != 1 || rq_has_pinned_tasks(rq)) { WARN(true, "Dying CPU not properly vacated!"); dump_rq_tasks(rq, KERN_WARNING); } + dl_server_stop(&rq->fair_server); rq_unlock_irqrestore(rq, &rf); calc_load_migrate(rq); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 615411a0a881..7b7671060bf9 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1582,6 +1582,9 @@ void dl_server_start(struct sched_dl_entity *dl_se) if (!dl_server(dl_se) || dl_se->dl_server_active) return; + if (WARN_ON_ONCE(!cpu_online(cpu_of(rq)))) + return; + dl_se->dl_server_active = 1; enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP); if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl)) From 17e3e88ed0b6318fde0d1c14df1a804711cab1b5 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 8 Oct 2025 15:12:14 +0200 Subject: [PATCH 0062/1075] sched/fair: Fix pelt lost idle time detection The check for some lost idle pelt time should be always done when pick_next_task_fair() fails to pick a task and not only when we call it from the fair fast-path. The case happens when the last running task on rq is a RT or DL task. When the latter goes to sleep and the /Sum of util_sum of the rq is at the max value, we don't account the lost of idle time whereas we should. Fixes: 67692435c411 ("sched: Rework pick_next_task() slow-path") Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/fair.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bc0b7ce8a65d..cee1793e8277 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8920,21 +8920,21 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf return p; idle: - if (!rf) - return NULL; + if (rf) { + new_tasks = sched_balance_newidle(rq, rf); - new_tasks = sched_balance_newidle(rq, rf); + /* + * Because sched_balance_newidle() releases (and re-acquires) + * rq->lock, it is possible for any higher priority task to + * appear. In that case we must re-start the pick_next_entity() + * loop. + */ + if (new_tasks < 0) + return RETRY_TASK; - /* - * Because sched_balance_newidle() releases (and re-acquires) rq->lock, it is - * possible for any higher priority task to appear. In that case we - * must re-start the pick_next_entity() loop. - */ - if (new_tasks < 0) - return RETRY_TASK; - - if (new_tasks > 0) - goto again; + if (new_tasks > 0) + goto again; + } /* * rq is about to be idle, check if we need to update the From 512f0b7ebbc79d97d9485cd055902d439237e91f Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Tue, 14 Oct 2025 15:19:33 +0200 Subject: [PATCH 0063/1075] media: cx18: Fix invalid access to file * Sice commit 7b9eb53e8591 ("media: cx18: Access v4l2_fh from file") all ioctl handlers have been ported to operate on the file * first function argument. The cx18 DVB layer calls cx18_init_on_first_open() when the driver needs to start streaming. This function calls the s_input(), s_std() and s_frequency() ioctl handlers directly, but being called from the driver context, it doesn't have a valid file * to pass them. This causes the ioctl handlers to deference an invalid pointer. Fix this by moving the implementation of those ioctls to functions that take a cx18 pointer instead of a file pointer, and turn the V4L2 ioctl handlers into wrappers that get the cx18 from the file. When calling from cx18_init_on_first_open(), pass the cx18 pointer directly. This allows removing the fake fh in cx18_init_on_first_open(). The bug has been reported by Smatch: --> 1223 cx18_s_input(NULL, &fh, video_input); The patch adds a new dereference of "file" but some of the callers pass a NULL pointer. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aKL4OMWsESUdX8KQ@stanley.mountain/ Fixes: 7b9eb53e8591 ("media: cx18: Access v4l2_fh from file") Cc: stable@vger.kernel.org Reviewed-by: Laurent Pinchart Tested-by: Hans Verkuil Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- drivers/media/pci/cx18/cx18-driver.c | 9 +++------ drivers/media/pci/cx18/cx18-ioctl.c | 30 ++++++++++++++++++---------- drivers/media/pci/cx18/cx18-ioctl.h | 8 +++++--- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/drivers/media/pci/cx18/cx18-driver.c b/drivers/media/pci/cx18/cx18-driver.c index b62fd12c93c1..74c59a94b2b0 100644 --- a/drivers/media/pci/cx18/cx18-driver.c +++ b/drivers/media/pci/cx18/cx18-driver.c @@ -1136,11 +1136,8 @@ int cx18_init_on_first_open(struct cx18 *cx) int video_input; int fw_retry_count = 3; struct v4l2_frequency vf; - struct cx18_open_id fh; v4l2_std_id std; - fh.cx = cx; - if (test_bit(CX18_F_I_FAILED, &cx->i_flags)) return -ENXIO; @@ -1220,14 +1217,14 @@ int cx18_init_on_first_open(struct cx18 *cx) video_input = cx->active_input; cx->active_input++; /* Force update of input */ - cx18_s_input(NULL, &fh, video_input); + cx18_do_s_input(cx, video_input); /* Let the VIDIOC_S_STD ioctl do all the work, keeps the code in one place. */ cx->std++; /* Force full standard initialization */ std = (cx->tuner_std == V4L2_STD_ALL) ? V4L2_STD_NTSC_M : cx->tuner_std; - cx18_s_std(NULL, &fh, std); - cx18_s_frequency(NULL, &fh, &vf); + cx18_do_s_std(cx, std); + cx18_do_s_frequency(cx, &vf); return 0; } diff --git a/drivers/media/pci/cx18/cx18-ioctl.c b/drivers/media/pci/cx18/cx18-ioctl.c index 0f3019739d03..0d676a57e24e 100644 --- a/drivers/media/pci/cx18/cx18-ioctl.c +++ b/drivers/media/pci/cx18/cx18-ioctl.c @@ -521,10 +521,8 @@ static int cx18_g_input(struct file *file, void *fh, unsigned int *i) return 0; } -int cx18_s_input(struct file *file, void *fh, unsigned int inp) +int cx18_do_s_input(struct cx18 *cx, unsigned int inp) { - struct cx18_open_id *id = file2id(file); - struct cx18 *cx = id->cx; v4l2_std_id std = V4L2_STD_ALL; const struct cx18_card_video_input *card_input = cx->card->video_inputs + inp; @@ -558,6 +556,11 @@ int cx18_s_input(struct file *file, void *fh, unsigned int inp) return 0; } +static int cx18_s_input(struct file *file, void *fh, unsigned int inp) +{ + return cx18_do_s_input(file2id(file)->cx, inp); +} + static int cx18_g_frequency(struct file *file, void *fh, struct v4l2_frequency *vf) { @@ -570,11 +573,8 @@ static int cx18_g_frequency(struct file *file, void *fh, return 0; } -int cx18_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf) +int cx18_do_s_frequency(struct cx18 *cx, const struct v4l2_frequency *vf) { - struct cx18_open_id *id = file2id(file); - struct cx18 *cx = id->cx; - if (vf->tuner != 0) return -EINVAL; @@ -585,6 +585,12 @@ int cx18_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *v return 0; } +static int cx18_s_frequency(struct file *file, void *fh, + const struct v4l2_frequency *vf) +{ + return cx18_do_s_frequency(file2id(file)->cx, vf); +} + static int cx18_g_std(struct file *file, void *fh, v4l2_std_id *std) { struct cx18 *cx = file2id(file)->cx; @@ -593,11 +599,8 @@ static int cx18_g_std(struct file *file, void *fh, v4l2_std_id *std) return 0; } -int cx18_s_std(struct file *file, void *fh, v4l2_std_id std) +int cx18_do_s_std(struct cx18 *cx, v4l2_std_id std) { - struct cx18_open_id *id = file2id(file); - struct cx18 *cx = id->cx; - if ((std & V4L2_STD_ALL) == 0) return -EINVAL; @@ -642,6 +645,11 @@ int cx18_s_std(struct file *file, void *fh, v4l2_std_id std) return 0; } +static int cx18_s_std(struct file *file, void *fh, v4l2_std_id std) +{ + return cx18_do_s_std(file2id(file)->cx, std); +} + static int cx18_s_tuner(struct file *file, void *fh, const struct v4l2_tuner *vt) { struct cx18_open_id *id = file2id(file); diff --git a/drivers/media/pci/cx18/cx18-ioctl.h b/drivers/media/pci/cx18/cx18-ioctl.h index 97cd9f99e22d..42a8acd69735 100644 --- a/drivers/media/pci/cx18/cx18-ioctl.h +++ b/drivers/media/pci/cx18/cx18-ioctl.h @@ -12,6 +12,8 @@ u16 cx18_service2vbi(int type); void cx18_expand_service_set(struct v4l2_sliced_vbi_format *fmt, int is_pal); u16 cx18_get_service_set(struct v4l2_sliced_vbi_format *fmt); void cx18_set_funcs(struct video_device *vdev); -int cx18_s_std(struct file *file, void *fh, v4l2_std_id std); -int cx18_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf); -int cx18_s_input(struct file *file, void *fh, unsigned int inp); + +struct cx18; +int cx18_do_s_std(struct cx18 *cx, v4l2_std_id std); +int cx18_do_s_frequency(struct cx18 *cx, const struct v4l2_frequency *vf); +int cx18_do_s_input(struct cx18 *cx, unsigned int inp); From c90fad3e4157f943b6d5842d3ceb20b32e566986 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Tue, 14 Oct 2025 15:19:34 +0200 Subject: [PATCH 0064/1075] media: ivtv: Fix invalid access to file * Since commit 9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file") all ioctl handlers have been ported to operate on the file * first function argument. The ivtv DVB layer calls ivtv_init_on_first_open() when the driver needs to start streaming. This function calls the s_input() and s_frequency() ioctl handlers directly, but being called from the driver context, it doesn't have a valid file * to pass them. This causes the ioctl handlers to deference an invalid pointer. Fix this by moving the implementation of those ioctls to two helper functions. The ivtv_do_s_input() helper accepts a struct ivtv * as first argument, which is easily accessible in ivtv_init_on_first_open() as well as from the file * argument of the ioctl handler. The ivtv_s_frequency() takes an ivtv_stream * instead. The stream * can safely be accessed in ivtv_init_on_first_open() where it is hard-coded to the IVTV_ENC_STREAM_TYPE_MPG stream type, as well as from the ioctl handler as a valid stream type is associated to each open file handle depending on which video device node has been opened in the ivtv_open() file operation. The bug has been reported by Smatch. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/aKL4OMWsESUdX8KQ@stanley.mountain/ Fixes: 9ba9d11544f9 ("media: ivtv: Access v4l2_fh from file") Cc: stable@vger.kernel.org Reviewed-by: Laurent Pinchart Tested-by: Hans Verkuil Signed-off-by: Jacopo Mondi Signed-off-by: Hans Verkuil --- drivers/media/pci/ivtv/ivtv-driver.c | 11 ++++------- drivers/media/pci/ivtv/ivtv-ioctl.c | 22 +++++++++++++++++----- drivers/media/pci/ivtv/ivtv-ioctl.h | 6 ++++-- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/media/pci/ivtv/ivtv-driver.c b/drivers/media/pci/ivtv/ivtv-driver.c index 72a8f76a41f4..459eb6cc370c 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.c +++ b/drivers/media/pci/ivtv/ivtv-driver.c @@ -1247,15 +1247,12 @@ static int ivtv_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) int ivtv_init_on_first_open(struct ivtv *itv) { - struct v4l2_frequency vf; /* Needed to call ioctls later */ - struct ivtv_open_id fh; + struct ivtv_stream *s = &itv->streams[IVTV_ENC_STREAM_TYPE_MPG]; + struct v4l2_frequency vf; int fw_retry_count = 3; int video_input; - fh.itv = itv; - fh.type = IVTV_ENC_STREAM_TYPE_MPG; - if (test_bit(IVTV_F_I_FAILED, &itv->i_flags)) return -ENXIO; @@ -1297,13 +1294,13 @@ int ivtv_init_on_first_open(struct ivtv *itv) video_input = itv->active_input; itv->active_input++; /* Force update of input */ - ivtv_s_input(NULL, &fh, video_input); + ivtv_do_s_input(itv, video_input); /* Let the VIDIOC_S_STD ioctl do all the work, keeps the code in one place. */ itv->std++; /* Force full standard initialization */ itv->std_out = itv->std; - ivtv_s_frequency(NULL, &fh, &vf); + ivtv_do_s_frequency(s, &vf); if (itv->card->v4l2_capabilities & V4L2_CAP_VIDEO_OUTPUT) { /* Turn on the TV-out: ivtv_init_mpeg_decoder() initializes diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.c b/drivers/media/pci/ivtv/ivtv-ioctl.c index 84c73bd22f2d..8d5ea3aec06f 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.c +++ b/drivers/media/pci/ivtv/ivtv-ioctl.c @@ -974,9 +974,8 @@ static int ivtv_g_input(struct file *file, void *fh, unsigned int *i) return 0; } -int ivtv_s_input(struct file *file, void *fh, unsigned int inp) +int ivtv_do_s_input(struct ivtv *itv, unsigned int inp) { - struct ivtv *itv = file2id(file)->itv; v4l2_std_id std; int i; @@ -1017,6 +1016,11 @@ int ivtv_s_input(struct file *file, void *fh, unsigned int inp) return 0; } +static int ivtv_s_input(struct file *file, void *fh, unsigned int inp) +{ + return ivtv_do_s_input(file2id(file)->itv, inp); +} + static int ivtv_g_output(struct file *file, void *fh, unsigned int *i) { struct ivtv *itv = file2id(file)->itv; @@ -1065,10 +1069,9 @@ static int ivtv_g_frequency(struct file *file, void *fh, struct v4l2_frequency * return 0; } -int ivtv_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf) +int ivtv_do_s_frequency(struct ivtv_stream *s, const struct v4l2_frequency *vf) { - struct ivtv *itv = file2id(file)->itv; - struct ivtv_stream *s = &itv->streams[file2id(file)->type]; + struct ivtv *itv = s->itv; if (s->vdev.vfl_dir) return -ENOTTY; @@ -1082,6 +1085,15 @@ int ivtv_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *v return 0; } +static int ivtv_s_frequency(struct file *file, void *fh, + const struct v4l2_frequency *vf) +{ + struct ivtv_open_id *id = file2id(file); + struct ivtv *itv = id->itv; + + return ivtv_do_s_frequency(&itv->streams[id->type], vf); +} + static int ivtv_g_std(struct file *file, void *fh, v4l2_std_id *std) { struct ivtv *itv = file2id(file)->itv; diff --git a/drivers/media/pci/ivtv/ivtv-ioctl.h b/drivers/media/pci/ivtv/ivtv-ioctl.h index 7f8c6f43d397..96ca7e2ef973 100644 --- a/drivers/media/pci/ivtv/ivtv-ioctl.h +++ b/drivers/media/pci/ivtv/ivtv-ioctl.h @@ -9,6 +9,8 @@ #ifndef IVTV_IOCTL_H #define IVTV_IOCTL_H +struct ivtv; + u16 ivtv_service2vbi(int type); void ivtv_expand_service_set(struct v4l2_sliced_vbi_format *fmt, int is_pal); u16 ivtv_get_service_set(struct v4l2_sliced_vbi_format *fmt); @@ -17,7 +19,7 @@ int ivtv_set_speed(struct ivtv *itv, int speed); void ivtv_set_funcs(struct video_device *vdev); void ivtv_s_std_enc(struct ivtv *itv, v4l2_std_id std); void ivtv_s_std_dec(struct ivtv *itv, v4l2_std_id std); -int ivtv_s_frequency(struct file *file, void *fh, const struct v4l2_frequency *vf); -int ivtv_s_input(struct file *file, void *fh, unsigned int inp); +int ivtv_do_s_frequency(struct ivtv_stream *s, const struct v4l2_frequency *vf); +int ivtv_do_s_input(struct ivtv *itv, unsigned int inp); #endif From 2e41e5a91a37202ff6743c3ae5329e106aeb1c6c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Oct 2025 13:57:53 -0700 Subject: [PATCH 0065/1075] cxl/acpi: Fix setup of memory resource in cxl_acpi_set_cache_size() In order to compare the resource against the HMAT memory target, the resource needs to be memory type. Change the DEFINE_RES() macro to DEFINE_RES_MEM() in order to set the correct resource type. hmat_get_extended_linear_cache_size() uses resource_contains() internally. This causes a regression for platforms with the extended linear cache enabled as the comparison always fails and the cache size is not set. User visible impact is that when 'cxl list' is issued, a CXL region with extended linear cache support will only report half the size of the actual size. And this also breaks MCE reporting of the memory region due to incorrect offset calculation for the memory. [dj: Fixup commit log suggested by djbw] [dj: Fixup stable address for cc] Fixes: 12b3d697c812 ("cxl: Remove core/acpi.c and cxl core dependency on ACPI") Cc: stable@vger.kernel.org Reviewed-by: Gregory Price Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d7a5539d07d4..bd2e282ca93a 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -348,7 +348,7 @@ static int cxl_acpi_set_cache_size(struct cxl_root_decoder *cxlrd) struct resource res; int nid, rc; - res = DEFINE_RES(start, size, 0); + res = DEFINE_RES_MEM(start, size); nid = phys_to_target_node(start); rc = hmat_get_extended_linear_cache_size(&res, nid, &cache_size); From 0f6f1982cb28abf1b8a3a8ba906e2c6ade6a70e8 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Oct 2025 13:57:54 -0700 Subject: [PATCH 0066/1075] cxl: Set range param for region_res_match_cxl_range() as const The function takes two parameters and compares them. The second parameter should be const since no modification should be done to it. Reviewed-by: Gregory Price Reviewed-by: Alison Schofield Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e14c1d305b22..858d4678628d 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -839,7 +839,7 @@ static int match_free_decoder(struct device *dev, const void *data) } static bool region_res_match_cxl_range(const struct cxl_region_params *p, - struct range *range) + const struct range *range) { if (!p->res) return false; From f4d027921c811ff7fc16e4d03c6bbbf4347cf37a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 10 Oct 2025 13:57:55 -0700 Subject: [PATCH 0067/1075] cxl: Fix match_region_by_range() to use region_res_match_cxl_range() match_region_by_range() is not using the helper function that also takes extended linear cache size into account when comparing regions. This causes a x2 region to show up as 2 partial incomplete regions rather than a single CXL region with extended linear cache support. Replace the open coded compare logic with the proper helper function for comparison. User visible impact is that when 'cxl list' is issued, no activa CXL region(s) are shown. There may be multiple idle regions present. No actual active CXL region is present in the kernel. [dj: Fix stable address] Fixes: 0ec9849b6333 ("acpi/hmat / cxl: Add extended linear cache support for CXL") Cc: stable@vger.kernel.org Reviewed-by: Gregory Price Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 858d4678628d..57ed85e332d3 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3398,10 +3398,7 @@ static int match_region_by_range(struct device *dev, const void *data) p = &cxlr->params; guard(rwsem_read)(&cxl_rwsem.region); - if (p->res && p->res->start == r->start && p->res->end == r->end) - return 1; - - return 0; + return region_res_match_cxl_range(p, r); } static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr, From 257c4b03a2f7d8c15f79c79b09a561af9734f6c4 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 14 Oct 2025 00:31:04 -0700 Subject: [PATCH 0068/1075] cxl/region: Use %pa printk format to emit resource_size_t KASAN reports a stack-out-of-bounds access in validate_region_offset() while running the cxl-poison.sh unit test because the printk format specifier, %pr format, is not a match for the resource_size_t type of the variables. %pr expects struct resource pointers and attempts to dereference the structure fields, reading beyond the bounds of the stack variables. Since these messages emit an 'A exceeds B' type of message, keep the resource_size_t's and use the %pa specifier to be architecture safe. BUG: KASAN: stack-out-of-bounds in resource_string.isra.0+0xe9a/0x1690 [] Read of size 8 at addr ffff88800a7afb40 by task bash/1397 ... [] The buggy address belongs to stack of task bash/1397 [] and is located at offset 56 in frame: [] validate_region_offset+0x0/0x1c0 [cxl_core] Fixes: c3dd67681c70 ("cxl/region: Add inject and clear poison by region offset") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 57ed85e332d3..b06fee1978ba 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3663,14 +3663,14 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset) if (offset < p->cache_size) { dev_err(&cxlr->dev, - "Offset %#llx is within extended linear cache %pr\n", + "Offset %#llx is within extended linear cache %pa\n", offset, &p->cache_size); return -EINVAL; } region_size = resource_size(p->res); if (offset >= region_size) { - dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pr\n", + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n", offset, ®ion_size); return -EINVAL; } From 05e63305c85c88141500f0a2fb02afcfba9396e1 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 13 Oct 2025 22:36:34 +0200 Subject: [PATCH 0069/1075] sched_ext: Fix scx_kick_pseqs corruption on concurrent scheduler loads If we load a BPF scheduler while another scheduler is already running, alloc_kick_pseqs() would be called again, overwriting the previously allocated arrays. Fix by moving the alloc_kick_pseqs() call after the scx_enable_state() check, ensuring that the arrays are only allocated when a scheduler can actually be loaded. Fixes: 14c1da3895a11 ("sched_ext: Allocate scx_kick_cpus_pnt_seqs lazily using kvzalloc()") Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index c645d47124e7..12c9c3595692 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4577,15 +4577,15 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) mutex_lock(&scx_enable_mutex); + if (scx_enable_state() != SCX_DISABLED) { + ret = -EBUSY; + goto err_unlock; + } + ret = alloc_kick_pseqs(); if (ret) goto err_unlock; - if (scx_enable_state() != SCX_DISABLED) { - ret = -EBUSY; - goto err_free_pseqs; - } - sch = scx_alloc_and_add_sched(ops); if (IS_ERR(sch)) { ret = PTR_ERR(sch); From 469276c06affdfd2d9e88c9f228bb81119ec1a20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 14 Oct 2025 19:36:02 +0300 Subject: [PATCH 0070/1075] PCI: Revert early bridge resource set up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit a43ac325c7cb ("PCI: Set up bridge resources earlier") moved bridge window resources set up earlier than before. The change was necessary to support another change that got pulled on the last minute due to breaking s390 and other systems. The presence of valid bridge window resources earlier than before allows pci_assign_unassigned_root_bus_resources() call from pci_host_probe() assign the bridge windows. Some host bridges, however, have to wait first for the link up event before they can enumerate successfully (see e.g. qcom_pcie_global_irq_thread()) and thus the bus has not been enumerated yet while calling pci_host_probe(). Calling pci_assign_unassigned_root_bus_resources() without results from enumeration can result in sizing bridge windows with too small sizes which cannot be later corrected after the enumeration has completed because bridge windows have become pinned in place by the other resources. Interestingly, it seems pci_read_bridge_bases() is not called at all in the problematic case and the bridge window resource type setup is done by pci_bridge_check_ranges() and sizing by the usual resource fitting logic. The root problem behind all this looks pretty generic. If resource fitting is called too early, the hotplug reservation and old size lower bounding cause the bridge windows to be assigned without children but with non-zero size, which leads to these pinning problems. As such, this can likely be solved on the general level but the solution does not look trivial. As the commit a43ac325c7cb ("PCI: Set up bridge resources earlier") was prequisite for other change that did not end up into kernel yet, revert it to resolve the resource assignment failures and give time to code and test a generic solution. Fixes: a43ac325c7cb ("PCI: Set up bridge resources earlier") Reported-by: Val Packett Link: https://lore.kernel.org/r/017ff8df-511c-4da8-b3cf-edf2cb7f1a67@packett.cool Reported-by: Guenter Roeck Link: https://lore.kernel.org/r/df266709-a9b3-4fd8-af3a-c22eb3c9523a@roeck-us.net Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251014163602.17138-1-ilpo.jarvinen@linux.intel.com --- drivers/pci/probe.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c83e75a0ec12..0ce98e18b5a8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -538,14 +538,10 @@ static void pci_read_bridge_windows(struct pci_dev *bridge) } if (io) { bridge->io_window = 1; - pci_read_bridge_io(bridge, - pci_resource_n(bridge, PCI_BRIDGE_IO_WINDOW), - true); + pci_read_bridge_io(bridge, &res, true); } - pci_read_bridge_mmio(bridge, - pci_resource_n(bridge, PCI_BRIDGE_MEM_WINDOW), - true); + pci_read_bridge_mmio(bridge, &res, true); /* * DECchip 21050 pass 2 errata: the bridge may miss an address @@ -583,10 +579,7 @@ static void pci_read_bridge_windows(struct pci_dev *bridge) bridge->pref_64_window = 1; } - pci_read_bridge_mmio_pref(bridge, - pci_resource_n(bridge, - PCI_BRIDGE_PREF_MEM_WINDOW), - true); + pci_read_bridge_mmio_pref(bridge, &res, true); } void pci_read_bridge_bases(struct pci_bus *child) From a4bbb493a3247ef32f6191fd8b2a0657139f8e08 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 14 Oct 2025 14:38:49 -0700 Subject: [PATCH 0071/1075] cxl/trace: Subtract to find an hpa_alias0 in cxl_poison events Traces of cxl_poison events include an hpa_alias0 field if the poison address is in a region configured with an ELC, Extended Linear Cache. Since the ELC always comes first in the region, the calculation needs to subtract the ELC size from the calculated HPA address. Fixes: 8c520c5f1e76 ("cxl: Add extended linear cache address alias emission for cxl events") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Signed-off-by: Dave Jiang --- drivers/cxl/core/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index a53ec4798b12..a972e4ef1936 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -1068,7 +1068,7 @@ TRACE_EVENT(cxl_poison, __entry->hpa = cxl_dpa_to_hpa(cxlr, cxlmd, __entry->dpa); if (__entry->hpa != ULLONG_MAX && cxlr->params.cache_size) - __entry->hpa_alias0 = __entry->hpa + + __entry->hpa_alias0 = __entry->hpa - cxlr->params.cache_size; else __entry->hpa_alias0 = ULLONG_MAX; From a49c4d48c3b60926e6a8cec217bf95aa65388ecc Mon Sep 17 00:00:00 2001 From: Kurt Borja Date: Tue, 14 Oct 2025 05:07:27 -0500 Subject: [PATCH 0072/1075] platform/x86: alienware-wmi-wmax: Fix NULL pointer dereference in sleep handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Devices without the AWCC interface don't initialize `awcc`. Add a check before dereferencing it in sleep handlers. Cc: stable@vger.kernel.org Reported-by: Gal Hammer Tested-by: Gal Hammer Fixes: 07ac275981b1 ("platform/x86: alienware-wmi-wmax: Add support for manual fan control") Signed-off-by: Kurt Borja Link: https://patch.msgid.link/20251014-sleep-fix-v3-1-b5cb58da4638@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/alienware-wmi-wmax.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index 31f9643a6a3b..b106e8e407b3 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -1639,7 +1639,7 @@ static int wmax_wmi_probe(struct wmi_device *wdev, const void *context) static int wmax_wmi_suspend(struct device *dev) { - if (awcc->hwmon) + if (awcc && awcc->hwmon) awcc_hwmon_suspend(dev); return 0; @@ -1647,7 +1647,7 @@ static int wmax_wmi_suspend(struct device *dev) static int wmax_wmi_resume(struct device *dev) { - if (awcc->hwmon) + if (awcc && awcc->hwmon) awcc_hwmon_resume(dev); return 0; From a7b4747d8e0e7871c3d4971cded1dcc9af6af9e9 Mon Sep 17 00:00:00 2001 From: David Thompson Date: Mon, 13 Oct 2025 15:56:05 +0000 Subject: [PATCH 0073/1075] platform/mellanox: mlxbf-pmc: add sysfs_attr_init() to count_clock init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lock-related debug logic (CONFIG_LOCK_STAT) in the kernel is noting the following warning when the BlueField-3 SOC is booted: BUG: key ffff00008a3402a8 has not been registered! ------------[ cut here ]------------ DEBUG_LOCKS_WARN_ON(1) WARNING: CPU: 4 PID: 592 at kernel/locking/lockdep.c:4801 lockdep_init_map_type+0x1d4/0x2a0 Call trace: lockdep_init_map_type+0x1d4/0x2a0 __kernfs_create_file+0x84/0x140 sysfs_add_file_mode_ns+0xcc/0x1cc internal_create_group+0x110/0x3d4 internal_create_groups.part.0+0x54/0xcc sysfs_create_groups+0x24/0x40 device_add+0x6e8/0x93c device_register+0x28/0x40 __hwmon_device_register+0x4b0/0x8a0 devm_hwmon_device_register_with_groups+0x7c/0xe0 mlxbf_pmc_probe+0x1e8/0x3e0 [mlxbf_pmc] platform_probe+0x70/0x110 The mlxbf_pmc driver must call sysfs_attr_init() during the initialization of the "count_clock" data structure to avoid this warning. Fixes: 5efc800975d9 ("platform/mellanox: mlxbf-pmc: Add support for monitoring cycle count") Reviewed-by: Shravan Kumar Ramani Signed-off-by: David Thompson Link: https://patch.msgid.link/20251013155605.3589770-1-davthompson@nvidia.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/mellanox/mlxbf-pmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/mellanox/mlxbf-pmc.c b/drivers/platform/mellanox/mlxbf-pmc.c index 4776013e0764..16a2fd9fdd9b 100644 --- a/drivers/platform/mellanox/mlxbf-pmc.c +++ b/drivers/platform/mellanox/mlxbf-pmc.c @@ -2015,6 +2015,7 @@ static int mlxbf_pmc_init_perftype_counter(struct device *dev, unsigned int blk_ if (pmc->block[blk_num].type == MLXBF_PMC_TYPE_CRSPACE) { /* Program crspace counters to count clock cycles using "count_clock" sysfs */ attr = &pmc->block[blk_num].attr_count_clock; + sysfs_attr_init(&attr->dev_attr.attr); attr->dev_attr.attr.mode = 0644; attr->dev_attr.show = mlxbf_pmc_count_clock_show; attr->dev_attr.store = mlxbf_pmc_count_clock_store; From f4f868baf292550acbfc35839213de9da0d9e8ab Mon Sep 17 00:00:00 2001 From: Denis Benato Date: Fri, 3 Oct 2025 20:49:49 +0200 Subject: [PATCH 0074/1075] MAINTAINERS: add Denis Benato as maintainer for asus notebooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add myself as maintainer for "ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS" as suggested by Hans de Goede and Armin Wolf. Signed-off-by: Denis Benato Link: https://lore.kernel.org/all/8128cd6b-50e3-464c-90c2-781f61c3963e@gmail.com Reviewed-by: Hans de Goede Acked-by: Luke Jones Reviewed-by: Mario Limonciello (AMD) Link: https://patch.msgid.link/20251003184949.1083030-1-benato.denis96@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..832f3279ea83 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3841,6 +3841,7 @@ F: drivers/hwmon/asus-ec-sensors.c ASUS NOTEBOOKS AND EEEPC ACPI/WMI EXTRAS DRIVERS M: Corentin Chary M: Luke D. Jones +M: Denis Benato L: platform-driver-x86@vger.kernel.org S: Maintained W: https://asus-linux.org/ From 34cbd6e07fddf36e186c8bf26a456fb7f50af44e Mon Sep 17 00:00:00 2001 From: tr1x_em Date: Thu, 25 Sep 2025 09:10:03 +0530 Subject: [PATCH 0075/1075] platform/x86: alienware-wmi-wmax: Add AWCC support to Dell G15 5530 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes alienware-wmi load on G15 5530 by default Cc: stable@vger.kernel.org Signed-off-by: Saumya Reviewed-by: Kurt Borja Link: https://patch.msgid.link/20250925034010.31414-1-admin@trix.is-a.dev Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/alienware-wmi-wmax.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/platform/x86/dell/alienware-wmi-wmax.c b/drivers/platform/x86/dell/alienware-wmi-wmax.c index b106e8e407b3..f417dcc9af35 100644 --- a/drivers/platform/x86/dell/alienware-wmi-wmax.c +++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c @@ -209,6 +209,14 @@ static const struct dmi_system_id awcc_dmi_table[] __initconst = { }, .driver_data = &g_series_quirks, }, + { + .ident = "Dell Inc. G15 5530", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5530"), + }, + .driver_data = &g_series_quirks, + }, { .ident = "Dell Inc. G16 7630", .matches = { From 6b6e03106163458716c47df2baa9ad08ed4ddb0e Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 15 Oct 2025 15:36:59 +0800 Subject: [PATCH 0076/1075] spi: amlogic: fix spifc build error There is an error building when Compiler version: gcc (GCC) 14.3.0 Assembler version: GNU assembler (GNU Binutils) 2.44 " Error log: WARNING: modpost: missing MODULE_DESCRIPTION() in arch/arm/probes/kprobes/test-kprobes.o ERROR: modpost: "__ffsdi2" [drivers/spi/spi-amlogic-spifc-a4.ko] undefined! " Use __ffs API instead of __bf_shf to be safer. Reported-by: Guenter Roeck Closes: https://lore.kernel.org/all/f594c621-f9e1-49f2-af31-23fbcb176058@roeck-us.net/ Fixes: 4670db6f32e9 ("spi: amlogic: add driver for Amlogic SPI Flash Controller") Signed-off-by: Xianwei Zhao Link: https://patch.msgid.link/20251015-fix-spifc-a4-v1-1-08e0900e5b7e@amlogic.com Signed-off-by: Mark Brown --- drivers/spi/spi-amlogic-spifc-a4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-amlogic-spifc-a4.c b/drivers/spi/spi-amlogic-spifc-a4.c index 4338d00e56a6..35a7c4965e11 100644 --- a/drivers/spi/spi-amlogic-spifc-a4.c +++ b/drivers/spi/spi-amlogic-spifc-a4.c @@ -286,7 +286,7 @@ static int aml_sfc_set_bus_width(struct aml_sfc *sfc, u8 buswidth, u32 mask) for (i = 0; i <= LANE_MAX; i++) { if (buswidth == 1 << i) { - conf = i << __bf_shf(mask); + conf = i << __ffs(mask); return regmap_update_bits(sfc->regmap_base, SFC_SPI_CFG, mask, conf); } @@ -566,7 +566,7 @@ static int aml_sfc_raw_io_op(struct aml_sfc *sfc, const struct spi_mem_op *op) if (!op->data.nbytes) goto end_xfer; - conf = (op->data.nbytes >> RAW_SIZE_BW) << __bf_shf(RAW_EXT_SIZE); + conf = (op->data.nbytes >> RAW_SIZE_BW) << __ffs(RAW_EXT_SIZE); ret = regmap_update_bits(sfc->regmap_base, SFC_SPI_CFG, RAW_EXT_SIZE, conf); if (ret) goto err_out; From 85d7dda5a9f665ea579741ec873a8841f37e8943 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Tue, 23 Sep 2025 10:29:29 -0500 Subject: [PATCH 0077/1075] cpufreq/amd-pstate: Fix a regression leading to EPP 0 after hibernate After resuming from S4, all CPUs except the boot CPU have the wrong EPP hint programmed. This is because when the CPUs were offlined the EPP value was reset to 0. This is a similar problem as fixed by commit ba3319e590571 ("cpufreq/amd-pstate: Fix a regression leading to EPP 0 after resume") and the solution is also similar. When offlining rather than reset the values to zero, reset them to match those chosen by the policy. When the CPUs are onlined again these values will be restored. Closes: https://community.frame.work/t/increased-power-usage-after-resuming-from-suspend-on-ryzen-7040-kernel-6-15-regression/74531/20?u=mario_limonciello Fixes: 608a76b65288 ("cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option") Reviewed-by: Gautham R. Shenoy Signed-off-by: Mario Limonciello (AMD) --- drivers/cpufreq/amd-pstate.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 298e92d8cc03..b44f0f7a5ba1 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -1614,7 +1614,11 @@ static int amd_pstate_cpu_offline(struct cpufreq_policy *policy) * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the * limits, epp and desired perf will get reset to the cached values in cpudata struct */ - return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false); + return amd_pstate_update_perf(policy, perf.bios_min_perf, + FIELD_GET(AMD_CPPC_DES_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_MAX_PERF_MASK, cpudata->cppc_req_cached), + FIELD_GET(AMD_CPPC_EPP_PERF_MASK, cpudata->cppc_req_cached), + false); } static int amd_pstate_suspend(struct cpufreq_policy *policy) From 2290ab43b9d8eafb8046387f10a8dfa2b030ba46 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 14 Oct 2025 12:53:44 +0100 Subject: [PATCH 0078/1075] firmware: arm_scmi: Account for failed debug initialization When the SCMI debug subsystem fails to initialize, the related debug root will be missing, and the underlying descriptor will be NULL. Handle this fault condition in the SCMI debug helpers that maintain metrics counters. Fixes: 0b3d48c4726e ("firmware: arm_scmi: Track basic SCMI communication debug metrics") Signed-off-by: Cristian Marussi Message-Id: <20251014115346.2391418-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/common.h | 24 ++++++++++++++-- drivers/firmware/arm_scmi/driver.c | 44 ++++++++++-------------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 07b9e629276d..21c0b95027c6 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -309,10 +309,28 @@ enum debug_counters { SCMI_DEBUG_COUNTERS_LAST }; -static inline void scmi_inc_count(atomic_t *arr, int stat) +/** + * struct scmi_debug_info - Debug common info + * @top_dentry: A reference to the top debugfs dentry + * @name: Name of this SCMI instance + * @type: Type of this SCMI instance + * @is_atomic: Flag to state if the transport of this instance is atomic + * @counters: An array of atomic_c's used for tracking statistics (if enabled) + */ +struct scmi_debug_info { + struct dentry *top_dentry; + const char *name; + const char *type; + bool is_atomic; + atomic_t counters[SCMI_DEBUG_COUNTERS_LAST]; +}; + +static inline void scmi_inc_count(struct scmi_debug_info *dbg, int stat) { - if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) - atomic_inc(&arr[stat]); + if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) { + if (dbg) + atomic_inc(&dbg->counters[stat]); + } } static inline void scmi_dec_count(atomic_t *arr, int stat) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index bd56a877fdfc..56419285c0bf 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -115,22 +115,6 @@ struct scmi_protocol_instance { #define ph_to_pi(h) container_of(h, struct scmi_protocol_instance, ph) -/** - * struct scmi_debug_info - Debug common info - * @top_dentry: A reference to the top debugfs dentry - * @name: Name of this SCMI instance - * @type: Type of this SCMI instance - * @is_atomic: Flag to state if the transport of this instance is atomic - * @counters: An array of atomic_c's used for tracking statistics (if enabled) - */ -struct scmi_debug_info { - struct dentry *top_dentry; - const char *name; - const char *type; - bool is_atomic; - atomic_t counters[SCMI_DEBUG_COUNTERS_LAST]; -}; - /** * struct scmi_info - Structure representing a SCMI instance * @@ -1034,7 +1018,7 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr) spin_unlock_irqrestore(&minfo->xfer_lock, flags); scmi_bad_message_trace(cinfo, msg_hdr, MSG_UNEXPECTED); - scmi_inc_count(info->dbg->counters, ERR_MSG_UNEXPECTED); + scmi_inc_count(info->dbg, ERR_MSG_UNEXPECTED); return xfer; } @@ -1062,7 +1046,7 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr) msg_type, xfer_id, msg_hdr, xfer->state); scmi_bad_message_trace(cinfo, msg_hdr, MSG_INVALID); - scmi_inc_count(info->dbg->counters, ERR_MSG_INVALID); + scmi_inc_count(info->dbg, ERR_MSG_INVALID); /* On error the refcount incremented above has to be dropped */ __scmi_xfer_put(minfo, xfer); @@ -1107,7 +1091,7 @@ static void scmi_handle_notification(struct scmi_chan_info *cinfo, PTR_ERR(xfer)); scmi_bad_message_trace(cinfo, msg_hdr, MSG_NOMEM); - scmi_inc_count(info->dbg->counters, ERR_MSG_NOMEM); + scmi_inc_count(info->dbg, ERR_MSG_NOMEM); scmi_clear_channel(info, cinfo); return; @@ -1123,7 +1107,7 @@ static void scmi_handle_notification(struct scmi_chan_info *cinfo, trace_scmi_msg_dump(info->id, cinfo->id, xfer->hdr.protocol_id, xfer->hdr.id, "NOTI", xfer->hdr.seq, xfer->hdr.status, xfer->rx.buf, xfer->rx.len); - scmi_inc_count(info->dbg->counters, NOTIFICATION_OK); + scmi_inc_count(info->dbg, NOTIFICATION_OK); scmi_notify(cinfo->handle, xfer->hdr.protocol_id, xfer->hdr.id, xfer->rx.buf, xfer->rx.len, ts); @@ -1183,10 +1167,10 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo, if (xfer->hdr.type == MSG_TYPE_DELAYED_RESP) { scmi_clear_channel(info, cinfo); complete(xfer->async_done); - scmi_inc_count(info->dbg->counters, DELAYED_RESPONSE_OK); + scmi_inc_count(info->dbg, DELAYED_RESPONSE_OK); } else { complete(&xfer->done); - scmi_inc_count(info->dbg->counters, RESPONSE_OK); + scmi_inc_count(info->dbg, RESPONSE_OK); } if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { @@ -1296,7 +1280,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, "timed out in resp(caller: %pS) - polling\n", (void *)_RET_IP_); ret = -ETIMEDOUT; - scmi_inc_count(info->dbg->counters, XFERS_RESPONSE_POLLED_TIMEOUT); + scmi_inc_count(info->dbg, XFERS_RESPONSE_POLLED_TIMEOUT); } } @@ -1321,7 +1305,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, "RESP" : "resp", xfer->hdr.seq, xfer->hdr.status, xfer->rx.buf, xfer->rx.len); - scmi_inc_count(info->dbg->counters, RESPONSE_POLLED_OK); + scmi_inc_count(info->dbg, RESPONSE_POLLED_OK); if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { scmi_raw_message_report(info->raw, xfer, @@ -1336,7 +1320,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, dev_err(dev, "timed out in resp(caller: %pS)\n", (void *)_RET_IP_); ret = -ETIMEDOUT; - scmi_inc_count(info->dbg->counters, XFERS_RESPONSE_TIMEOUT); + scmi_inc_count(info->dbg, XFERS_RESPONSE_TIMEOUT); } } @@ -1420,13 +1404,13 @@ static int do_xfer(const struct scmi_protocol_handle *ph, !is_transport_polling_capable(info->desc)) { dev_warn_once(dev, "Polling mode is not supported by transport.\n"); - scmi_inc_count(info->dbg->counters, SENT_FAIL_POLLING_UNSUPPORTED); + scmi_inc_count(info->dbg, SENT_FAIL_POLLING_UNSUPPORTED); return -EINVAL; } cinfo = idr_find(&info->tx_idr, pi->proto->id); if (unlikely(!cinfo)) { - scmi_inc_count(info->dbg->counters, SENT_FAIL_CHANNEL_NOT_FOUND); + scmi_inc_count(info->dbg, SENT_FAIL_CHANNEL_NOT_FOUND); return -EINVAL; } /* True ONLY if also supported by transport. */ @@ -1461,19 +1445,19 @@ static int do_xfer(const struct scmi_protocol_handle *ph, ret = info->desc->ops->send_message(cinfo, xfer); if (ret < 0) { dev_dbg(dev, "Failed to send message %d\n", ret); - scmi_inc_count(info->dbg->counters, SENT_FAIL); + scmi_inc_count(info->dbg, SENT_FAIL); return ret; } trace_scmi_msg_dump(info->id, cinfo->id, xfer->hdr.protocol_id, xfer->hdr.id, "CMND", xfer->hdr.seq, xfer->hdr.status, xfer->tx.buf, xfer->tx.len); - scmi_inc_count(info->dbg->counters, SENT_OK); + scmi_inc_count(info->dbg, SENT_OK); ret = scmi_wait_for_message_response(cinfo, xfer); if (!ret && xfer->hdr.status) { ret = scmi_to_linux_errno(xfer->hdr.status); - scmi_inc_count(info->dbg->counters, ERR_PROTOCOL); + scmi_inc_count(info->dbg, ERR_PROTOCOL); } if (info->desc->ops->mark_txdone) From 289ce7e9a5e1a52ac7e522a3e389dc16be08d7a4 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 14 Oct 2025 12:53:45 +0100 Subject: [PATCH 0079/1075] include: trace: Fix inflight count helper on failed initialization Add a check to the scmi_inflight_count() helper to handle the case when the SCMI debug subsystem fails to initialize. Fixes: f8e656382b4a ("include: trace: Add tracepoint support for inflight xfer count") Signed-off-by: Cristian Marussi Message-Id: <20251014115346.2391418-2-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/common.h | 8 +++++--- drivers/firmware/arm_scmi/driver.c | 7 +++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 21c0b95027c6..7c35c95fddba 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -333,10 +333,12 @@ static inline void scmi_inc_count(struct scmi_debug_info *dbg, int stat) } } -static inline void scmi_dec_count(atomic_t *arr, int stat) +static inline void scmi_dec_count(struct scmi_debug_info *dbg, int stat) { - if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) - atomic_dec(&arr[stat]); + if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) { + if (dbg) + atomic_dec(&dbg->counters[stat]); + } } enum scmi_bad_msg { diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 56419285c0bf..1cd15412024c 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -594,7 +594,7 @@ scmi_xfer_inflight_register_unlocked(struct scmi_xfer *xfer, /* Set in-flight */ set_bit(xfer->hdr.seq, minfo->xfer_alloc_table); hash_add(minfo->pending_xfers, &xfer->node, xfer->hdr.seq); - scmi_inc_count(info->dbg->counters, XFERS_INFLIGHT); + scmi_inc_count(info->dbg, XFERS_INFLIGHT); xfer->pending = true; } @@ -803,7 +803,7 @@ __scmi_xfer_put(struct scmi_xfers_info *minfo, struct scmi_xfer *xfer) hash_del(&xfer->node); xfer->pending = false; - scmi_dec_count(info->dbg->counters, XFERS_INFLIGHT); + scmi_dec_count(info->dbg, XFERS_INFLIGHT); } hlist_add_head(&xfer->node, &minfo->free_xfers); } @@ -3407,6 +3407,9 @@ int scmi_inflight_count(const struct scmi_handle *handle) if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) { struct scmi_info *info = handle_to_scmi_info(handle); + if (!info->dbg) + return 0; + return atomic_read(&info->dbg->counters[XFERS_INFLIGHT]); } else { return 0; From 092b9e2ce6dd63d2f36822751a51957412706986 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 14 Oct 2025 12:53:46 +0100 Subject: [PATCH 0080/1075] firmware: arm_scmi: Skip RAW initialization on failure Avoid attempting to initialize RAW mode when the debug subsystem itself has failed to initialize, since doing so is pointless and emits misleading error messages. Signed-off-by: Cristian Marussi Message-Id: <20251014115346.2391418-3-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 1cd15412024c..eb46694cb14b 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -3028,9 +3028,6 @@ static int scmi_debugfs_raw_mode_setup(struct scmi_info *info) u8 channels[SCMI_MAX_CHANNELS] = {}; DECLARE_BITMAP(protos, SCMI_MAX_CHANNELS) = {}; - if (!info->dbg) - return -EINVAL; - /* Enumerate all channels to collect their ids */ idr_for_each_entry(&info->tx_idr, cinfo, id) { /* @@ -3202,7 +3199,7 @@ static int scmi_probe(struct platform_device *pdev) if (!info->dbg) dev_warn(dev, "Failed to setup SCMI debugfs.\n"); - if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { + if (info->dbg && IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { ret = scmi_debugfs_raw_mode_setup(info); if (!coex) { if (ret) From 20b93a0088a595bceed4a026d527cbbac4e876c5 Mon Sep 17 00:00:00 2001 From: Artem Shimko Date: Wed, 8 Oct 2025 12:10:57 +0300 Subject: [PATCH 0081/1075] firmware: arm_scmi: Fix premature SCMI_XFER_FLAG_IS_RAW clearing in raw mode The SCMI_XFER_FLAG_IS_RAW flag was being cleared prematurely in scmi_xfer_raw_put() before the transfer completion was properly acknowledged by the raw message handlers. Move the clearing of SCMI_XFER_FLAG_IS_RAW and SCMI_XFER_FLAG_CHAN_SET from scmi_xfer_raw_put() to __scmi_xfer_put() to ensure the flags remain set throughout the entire raw message processing pipeline until the transfer is returned to the free pool. Fixes: 3095a3e25d8f ("firmware: arm_scmi: Add xfer helpers to provide raw access") Suggested-by: Cristian Marussi Signed-off-by: Artem Shimko Reviewed-by: Cristian Marussi Message-Id: <20251008091057.1969260-1-a.shimko.dev@gmail.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index eb46694cb14b..5caa9191a8d1 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -805,6 +805,7 @@ __scmi_xfer_put(struct scmi_xfers_info *minfo, struct scmi_xfer *xfer) scmi_dec_count(info->dbg, XFERS_INFLIGHT); } + xfer->flags = 0; hlist_add_head(&xfer->node, &minfo->free_xfers); } spin_unlock_irqrestore(&minfo->xfer_lock, flags); @@ -823,8 +824,6 @@ void scmi_xfer_raw_put(const struct scmi_handle *handle, struct scmi_xfer *xfer) { struct scmi_info *info = handle_to_scmi_info(handle); - xfer->flags &= ~SCMI_XFER_FLAG_IS_RAW; - xfer->flags &= ~SCMI_XFER_FLAG_CHAN_SET; return __scmi_xfer_put(&info->tx_minfo, xfer); } From 3f9c60f4d3cc3e7f4dd7cac05011ea06d512050f Mon Sep 17 00:00:00 2001 From: Sebastian Chlad Date: Wed, 15 Oct 2025 12:33:56 +0200 Subject: [PATCH 0082/1075] selftests: cgroup: add values_close_report helper Some cgroup selftests, such as test_cpu, occasionally fail by a very small margin and if run in the CI context, it is useful to have detailed diagnostic output to understand the deviation. Introduce a values_close_report() helper which performs the same comparison as values_close(), but prints detailed information when the values differ beyond the allowed tolerance. Signed-off-by: Sebastian Chlad Signed-off-by: Tejun Heo --- .../cgroup/lib/include/cgroup_util.h | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h index 9dc90a1b386d..7ab2824ed7b5 100644 --- a/tools/testing/selftests/cgroup/lib/include/cgroup_util.h +++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h @@ -25,6 +25,26 @@ static inline int values_close(long a, long b, int err) return labs(a - b) <= (a + b) / 100 * err; } +/* + * Checks if two given values differ by less than err% of their sum and assert + * with detailed debug info if not. + */ +static inline int values_close_report(long a, long b, int err) +{ + long diff = labs(a - b); + long limit = (a + b) / 100 * err; + double actual_err = (a + b) ? (100.0 * diff / (a + b)) : 0.0; + int close = diff <= limit; + + if (!close) + fprintf(stderr, + "[FAIL] actual=%ld expected=%ld | diff=%ld | limit=%ld | " + "tolerance=%d%% | actual_error=%.2f%%\n", + a, b, diff, limit, err, actual_err); + + return close; +} + extern ssize_t read_text(const char *path, char *buf, size_t max_len); extern ssize_t write_text(const char *path, char *buf, ssize_t len); From 4cdde87d723a0552f475c8c6b0db472a6945125f Mon Sep 17 00:00:00 2001 From: Sebastian Chlad Date: Wed, 15 Oct 2025 12:33:57 +0200 Subject: [PATCH 0083/1075] selftests: cgroup: Use values_close_report in test_cpu Convert test_cpu to use the newly added values_close_report() helper to print detailed diagnostics when a tolerance check fails. This provides clearer insight into deviations while run in the CI. Signed-off-by: Sebastian Chlad Signed-off-by: Tejun Heo --- tools/testing/selftests/cgroup/test_cpu.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index 2a60e6c41940..d54e2317efff 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -219,7 +219,7 @@ static int test_cpucg_stats(const char *root) if (user_usec <= 0) goto cleanup; - if (!values_close(usage_usec, expected_usage_usec, 1)) + if (!values_close_report(usage_usec, expected_usage_usec, 1)) goto cleanup; ret = KSFT_PASS; @@ -291,7 +291,7 @@ static int test_cpucg_nice(const char *root) user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec"); - if (!values_close(nice_usec, expected_nice_usec, 1)) + if (!values_close_report(nice_usec, expected_nice_usec, 1)) goto cleanup; ret = KSFT_PASS; @@ -404,7 +404,7 @@ overprovision_validate(const struct cpu_hogger *children, int num_children) goto cleanup; delta = children[i + 1].usage - children[i].usage; - if (!values_close(delta, children[0].usage, 35)) + if (!values_close_report(delta, children[0].usage, 35)) goto cleanup; } @@ -444,7 +444,7 @@ underprovision_validate(const struct cpu_hogger *children, int num_children) int ret = KSFT_FAIL, i; for (i = 0; i < num_children - 1; i++) { - if (!values_close(children[i + 1].usage, children[0].usage, 15)) + if (!values_close_report(children[i + 1].usage, children[0].usage, 15)) goto cleanup; } @@ -573,16 +573,16 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned) nested_leaf_usage = leaf[1].usage + leaf[2].usage; if (overprovisioned) { - if (!values_close(leaf[0].usage, nested_leaf_usage, 15)) + if (!values_close_report(leaf[0].usage, nested_leaf_usage, 15)) goto cleanup; - } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15)) + } else if (!values_close_report(leaf[0].usage * 2, nested_leaf_usage, 15)) goto cleanup; child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec"); if (child_usage <= 0) goto cleanup; - if (!values_close(child_usage, nested_leaf_usage, 1)) + if (!values_close_report(child_usage, nested_leaf_usage, 1)) goto cleanup; ret = KSFT_PASS; @@ -691,7 +691,7 @@ static int test_cpucg_max(const char *root) expected_usage_usec = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (!values_close(usage_usec, expected_usage_usec, 10)) + if (!values_close_report(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; @@ -762,7 +762,7 @@ static int test_cpucg_max_nested(const char *root) expected_usage_usec = n_periods * quota_usec + MIN(remainder_usec, quota_usec); - if (!values_close(usage_usec, expected_usage_usec, 10)) + if (!values_close_report(usage_usec, expected_usage_usec, 10)) goto cleanup; ret = KSFT_PASS; From e6416c2dfe23c9a6fec881fda22ebb9ae486cfc5 Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Sat, 11 Oct 2025 00:59:58 +0800 Subject: [PATCH 0084/1075] x86/CPU/AMD: Prevent reset reasons from being retained across reboot The S5_RESET_STATUS register is parsed on boot and printed to kmsg. However, this could sometimes be misleading and lead to users wasting a lot of time on meaningless debugging for two reasons: * Some bits are never cleared by hardware. It's the software's responsibility to clear them as per the Processor Programming Reference (see [1]). * Some rare hardware-initiated platform resets do not update the register at all. In both cases, a previous reboot could leave its trace in the register, resulting in users seeing unrelated reboot reasons while debugging random reboots afterward. Write the read value back to the register in order to clear all reason bits since they are write-1-to-clear while the others must be preserved. [1]: https://bugzilla.kernel.org/show_bug.cgi?id=206537#attach_303991 [ bp: Massage commit message. ] Fixes: ab8131028710 ("x86/CPU/AMD: Print the reason for the last reset") Signed-off-by: Rong Zhang Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Yazen Ghannam Cc: Link: https://lore.kernel.org/all/20250913144245.23237-1-i@rong.moe/ --- arch/x86/kernel/cpu/amd.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 5398db4dedb4..ccaa51ce63f6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1355,11 +1355,23 @@ static __init int print_s5_reset_status_mmio(void) return 0; value = ioread32(addr); - iounmap(addr); /* Value with "all bits set" is an error response and should be ignored. */ - if (value == U32_MAX) + if (value == U32_MAX) { + iounmap(addr); return 0; + } + + /* + * Clear all reason bits so they won't be retained if the next reset + * does not update the register. Besides, some bits are never cleared by + * hardware so it's software's responsibility to clear them. + * + * Writing the value back effectively clears all reason bits as they are + * write-1-to-clear. + */ + iowrite32(value, addr); + iounmap(addr); for (i = 0; i < ARRAY_SIZE(s5_reset_reason_txt); i++) { if (!(value & BIT(i))) From c97513cddcfc235f2522617980838e500af21d01 Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Tue, 9 Sep 2025 22:52:43 +0800 Subject: [PATCH 0085/1075] hung_task: fix warnings caused by unaligned lock pointers The blocker tracking mechanism assumes that lock pointers are at least 4-byte aligned to use their lower bits for type encoding. However, as reported by Eero Tamminen, some architectures like m68k only guarantee 2-byte alignment of 32-bit values. This breaks the assumption and causes two related WARN_ON_ONCE checks to trigger. To fix this, the runtime checks are adjusted to silently ignore any lock that is not 4-byte aligned, effectively disabling the feature in such cases and avoiding the related warnings. Thanks to Geert Uytterhoeven for bisecting! Link: https://lkml.kernel.org/r/20250909145243.17119-1-lance.yang@linux.dev Fixes: e711faaafbe5 ("hung_task: replace blocker_mutex with encoded blocker") Signed-off-by: Lance Yang Reported-by: Eero Tamminen Closes: https://lore.kernel.org/lkml/CAMuHMdW7Ab13DdGs2acMQcix5ObJK0O2dG_Fxzr8_g58Rc1_0g@mail.gmail.com Reviewed-by: Masami Hiramatsu (Google) Cc: John Paul Adrian Glaubitz Cc: Anna Schumaker Cc: Boqun Feng Cc: Finn Thain Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Joel Granados Cc: John Stultz Cc: Kent Overstreet Cc: Lance Yang Cc: Mingzhe Yang Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: Tomasz Figa Cc: Waiman Long Cc: Will Deacon Cc: Yongliang Gao Cc: Signed-off-by: Andrew Morton --- include/linux/hung_task.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/hung_task.h b/include/linux/hung_task.h index 34e615c76ca5..c4403eeb7144 100644 --- a/include/linux/hung_task.h +++ b/include/linux/hung_task.h @@ -20,6 +20,10 @@ * always zero. So we can use these bits to encode the specific blocking * type. * + * Note that on architectures where this is not guaranteed, or for any + * unaligned lock, this tracking mechanism is silently skipped for that + * lock. + * * Type encoding: * 00 - Blocked on mutex (BLOCKER_TYPE_MUTEX) * 01 - Blocked on semaphore (BLOCKER_TYPE_SEM) @@ -45,7 +49,7 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type) * If the lock pointer matches the BLOCKER_TYPE_MASK, return * without writing anything. */ - if (WARN_ON_ONCE(lock_ptr & BLOCKER_TYPE_MASK)) + if (lock_ptr & BLOCKER_TYPE_MASK) return; WRITE_ONCE(current->blocker, lock_ptr | type); @@ -53,8 +57,6 @@ static inline void hung_task_set_blocker(void *lock, unsigned long type) static inline void hung_task_clear_blocker(void) { - WARN_ON_ONCE(!READ_ONCE(current->blocker)); - WRITE_ONCE(current->blocker, 0UL); } From f0c5118ebb0eb7e4fd6f0d2ace3315ca141b317f Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Oct 2025 13:14:54 -0700 Subject: [PATCH 0086/1075] mm/damon/sysfs: catch commit test ctx alloc failure Patch series "mm/damon/sysfs: fix commit test damon_ctx [de]allocation". DAMON sysfs interface dynamically allocates and uses a damon_ctx object for testing if given inputs for online DAMON parameters update is valid. The object is being used without an allocation failure check, and leaked when the test succeeds. Fix the two bugs. This patch (of 2): The damon_ctx for testing online DAMON parameters commit inputs is used without its allocation failure check. This could result in an invalid memory access. Fix it by directly returning an error when the allocation failed. Link: https://lkml.kernel.org/r/20251003201455.41448-1-sj@kernel.org Link: https://lkml.kernel.org/r/20251003201455.41448-2-sj@kernel.org Fixes: 4c9ea539ad59 ("mm/damon/sysfs: validate user inputs from damon_sysfs_commit_input()") Signed-off-by: SeongJae Park Cc: [6.15+] Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 2fc722f998f8..703f55a91b3c 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1473,6 +1473,8 @@ static int damon_sysfs_commit_input(void *data) if (IS_ERR(param_ctx)) return PTR_ERR(param_ctx); test_ctx = damon_new_ctx(); + if (!test_ctx) + return -ENOMEM; err = damon_commit_ctx(test_ctx, param_ctx); if (err) { damon_destroy_ctx(test_ctx); From 139e7a572af0b45f558b5e502121a768dc328ba8 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Fri, 3 Oct 2025 13:14:55 -0700 Subject: [PATCH 0087/1075] mm/damon/sysfs: dealloc commit test ctx always The damon_ctx for testing online DAMON parameters commit inputs is deallocated only when the test fails. This means memory is leaked for every successful online DAMON parameters commit. Fix the leak by always deallocating it. Link: https://lkml.kernel.org/r/20251003201455.41448-3-sj@kernel.org Fixes: 4c9ea539ad59 ("mm/damon/sysfs: validate user inputs from damon_sysfs_commit_input()") Signed-off-by: SeongJae Park Cc: [6.15+] Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 703f55a91b3c..cd6815ecc04e 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1476,12 +1476,11 @@ static int damon_sysfs_commit_input(void *data) if (!test_ctx) return -ENOMEM; err = damon_commit_ctx(test_ctx, param_ctx); - if (err) { - damon_destroy_ctx(test_ctx); + if (err) goto out; - } err = damon_commit_ctx(kdamond->damon_ctx, param_ctx); out: + damon_destroy_ctx(test_ctx); damon_destroy_ctx(param_ctx); return err; } From 03521c892bb8d0712c23e158ae9bdf8705897df8 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 9 Oct 2025 16:15:08 +0200 Subject: [PATCH 0088/1075] dma-debug: don't report false positives with DMA_BOUNCE_UNALIGNED_KMALLOC Commit 370645f41e6e ("dma-mapping: force bouncing if the kmalloc() size is not cache-line-aligned") introduced DMA_BOUNCE_UNALIGNED_KMALLOC feature and permitted architecture specific code configure kmalloc slabs with sizes smaller than the value of dma_get_cache_alignment(). When that feature is enabled, the physical address of some small kmalloc()-ed buffers might be not aligned to the CPU cachelines, thus not really suitable for typical DMA. To properly handle that case a SWIOTLB buffer bouncing is used, so no CPU cache corruption occurs. When that happens, there is no point reporting a false-positive DMA-API warning that the buffer is not properly aligned, as this is not a client driver fault. [m.szyprowski@samsung.com: replace is_swiotlb_allocated() with is_swiotlb_active(), per Catalin] Link: https://lkml.kernel.org/r/20251010173009.3916215-1-m.szyprowski@samsung.com Link: https://lkml.kernel.org/r/20251009141508.2342138-1-m.szyprowski@samsung.com Fixes: 370645f41e6e ("dma-mapping: force bouncing if the kmalloc() size is not cache-line-aligned") Signed-off-by: Marek Szyprowski Reviewed-by: Catalin Marinas Cc: Christoph Hellwig Cc: Inki Dae Cc: Robin Murohy Cc: "Isaac J. Manjarres" Cc: Signed-off-by: Andrew Morton --- kernel/dma/debug.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 1e5c64cb6a42..138ede653de4 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "debug.h" @@ -594,7 +595,9 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs) if (rc == -ENOMEM) { pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n"); global_disable = true; - } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) { + } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + !(IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && + is_swiotlb_active(entry->dev))) { err_printk(entry->dev, entry, "cacheline tracking EEXIST, overlapping mappings aren't supported\n"); } From c83aab85e18103a6dc066b4939e2c92a02bb1b05 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 9 Oct 2025 17:15:13 -0700 Subject: [PATCH 0089/1075] mm: don't spin in add_stack_record when gfp flags don't allow syzbot was able to find the following path: add_stack_record_to_list mm/page_owner.c:182 [inline] inc_stack_record_count mm/page_owner.c:214 [inline] __set_page_owner+0x2c3/0x4a0 mm/page_owner.c:333 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x240/0x2a0 mm/page_alloc.c:1851 prep_new_page mm/page_alloc.c:1859 [inline] get_page_from_freelist+0x21e4/0x22c0 mm/page_alloc.c:3858 alloc_pages_nolock_noprof+0x94/0x120 mm/page_alloc.c:7554 Don't spin in add_stack_record_to_list() when it is called from *_nolock() context. Link: https://lkml.kernel.org/r/CAADnVQK_8bNYEA7TJYgwTYR57=TTFagsvRxp62pFzS_z129eTg@mail.gmail.com Fixes: 97769a53f117 ("mm, bpf: Introduce try_alloc_pages() for opportunistic page allocation") Signed-off-by: Alexei Starovoitov Reported-by: syzbot+8259e1d0e3ae8ed0c490@syzkaller.appspotmail.com Reported-by: syzbot+665739f456b28f32b23d@syzkaller.appspotmail.com Acked-by: Vlastimil Babka Reviewed-by: Oscar Salvador Cc: Brendan Jackman Cc: Johannes Weiner Cc: Michal Hocko Cc: Suren Baghdasaryan Cc: Zi Yan Cc: Signed-off-by: Andrew Morton --- mm/page_owner.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/page_owner.c b/mm/page_owner.c index c3ca21132c2c..589ec37c94aa 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -168,6 +168,9 @@ static void add_stack_record_to_list(struct stack_record *stack_record, unsigned long flags; struct stack *stack; + if (!gfpflags_allow_spinning(gfp_mask)) + return; + set_current_in_page_owner(); stack = kmalloc(sizeof(*stack), gfp_nested_mask(gfp_mask)); if (!stack) { From 78a63493f8e352296dbc7cb7b3f4973105e8679e Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Thu, 9 Oct 2025 21:19:03 +0530 Subject: [PATCH 0090/1075] ocfs2: clear extent cache after moving/defragmenting extents The extent map cache can become stale when extents are moved or defragmented, causing subsequent operations to see outdated extent flags. This triggers a BUG_ON in ocfs2_refcount_cal_cow_clusters(). The problem occurs when: 1. copy_file_range() creates a reflinked extent with OCFS2_EXT_REFCOUNTED 2. ioctl(FITRIM) triggers ocfs2_move_extents() 3. __ocfs2_move_extents_range() reads and caches the extent (flags=0x2) 4. ocfs2_move_extent()/ocfs2_defrag_extent() calls __ocfs2_move_extent() which clears OCFS2_EXT_REFCOUNTED flag on disk (flags=0x0) 5. The extent map cache is not invalidated after the move 6. Later write() operations read stale cached flags (0x2) but disk has updated flags (0x0), causing a mismatch 7. BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) triggers Fix by clearing the extent map cache after each extent move/defrag operation in __ocfs2_move_extents_range(). This ensures subsequent operations read fresh extent data from disk. Link: https://lore.kernel.org/all/20251009142917.517229-1-kartikey406@gmail.com/T/ Link: https://lkml.kernel.org/r/20251009154903.522339-1-kartikey406@gmail.com Fixes: 53069d4e7695 ("Ocfs2/move_extents: move/defrag extents within a certain range.") Signed-off-by: Deepanshu Kartikey Reported-by: syzbot+6fdd8fa3380730a4b22c@syzkaller.appspotmail.com Tested-by: syzbot+6fdd8fa3380730a4b22c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?id=2959889e1f6e216585ce522f7e8bc002b46ad9e7 Reviewed-by: Mark Fasheh Reviewed-by: Joseph Qi Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Jun Piao Cc: Signed-off-by: Andrew Morton --- fs/ocfs2/move_extents.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 86f2631e6360..10923bf7c8b8 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -867,6 +867,11 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, mlog_errno(ret); goto out; } + /* + * Invalidate extent cache after moving/defragging to prevent + * stale cached data with outdated extent flags. + */ + ocfs2_extent_map_trunc(inode, cpos); context->clusters_moved += alloc_size; next: From 841a8bfcbad94bb1ba60f59ce34f75259074ae0d Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Sat, 11 Oct 2025 15:55:19 +0800 Subject: [PATCH 0091/1075] mm: prevent poison consumption when splitting THP When performing memory error injection on a THP (Transparent Huge Page) mapped to userspace on an x86 server, the kernel panics with the following trace. The expected behavior is to terminate the affected process instead of panicking the kernel, as the x86 Machine Check code can recover from an in-userspace #MC. mce: [Hardware Error]: CPU 0: Machine Check Exception: f Bank 3: bd80000000070134 mce: [Hardware Error]: RIP 10: {memchr_inv+0x4c/0xf0} mce: [Hardware Error]: TSC afff7bbff88a ADDR 1d301b000 MISC 80 PPIN 1e741e77539027db mce: [Hardware Error]: PROCESSOR 0:d06d0 TIME 1758093249 SOCKET 0 APIC 0 microcode 80000320 mce: [Hardware Error]: Run the above through 'mcelog --ascii' mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel Kernel panic - not syncing: Fatal local machine check The root cause of this panic is that handling a memory failure triggered by an in-userspace #MC necessitates splitting the THP. The splitting process employs a mechanism, implemented in try_to_map_unused_to_zeropage(), which reads the pages in the THP to identify zero-filled pages. However, reading the pages in the THP results in a second in-kernel #MC, occurring before the initial memory_failure() completes, ultimately leading to a kernel panic. See the kernel panic call trace on the two #MCs. First Machine Check occurs // [1] memory_failure() // [2] try_to_split_thp_page() split_huge_page() split_huge_page_to_list_to_order() __folio_split() // [3] remap_page() remove_migration_ptes() remove_migration_pte() try_to_map_unused_to_zeropage() // [4] memchr_inv() // [5] Second Machine Check occurs // [6] Kernel panic [1] Triggered by accessing a hardware-poisoned THP in userspace, which is typically recoverable by terminating the affected process. [2] Call folio_set_has_hwpoisoned() before try_to_split_thp_page(). [3] Pass the RMP_USE_SHARED_ZEROPAGE remap flag to remap_page(). [4] Try to map the unused THP to zeropage. [5] Re-access pages in the hw-poisoned THP in the kernel. [6] Triggered in-kernel, leading to a panic kernel. In Step[2], memory_failure() sets the poisoned flag on the page in the THP by TestSetPageHWPoison() before calling try_to_split_thp_page(). As suggested by David Hildenbrand, fix this panic by not accessing to the poisoned page in the THP during zeropage identification, while continuing to scan unaffected pages in the THP for possible zeropage mapping. This prevents a second in-kernel #MC that would cause kernel panic in Step[4]. Thanks to Andrew Zaborowski for his initial work on fixing this issue. Link: https://lkml.kernel.org/r/20251015064926.1887643-1-qiuxu.zhuo@intel.com Link: https://lkml.kernel.org/r/20251011075520.320862-1-qiuxu.zhuo@intel.com Fixes: b1f202060afe ("mm: remap unused subpages to shared zeropage when splitting isolated thp") Signed-off-by: Qiuxu Zhuo Reported-by: Farrah Chen Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Tested-by: Farrah Chen Tested-by: Qiuxu Zhuo Acked-by: Lance Yang Reviewed-by: Wei Yang Acked-by: Zi Yan Reviewed-by: Miaohe Lin Cc: Barry Song Cc: Dev Jain Cc: Jiaqi Yan Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: "Luck, Tony" Cc: Mariano Pache Cc: Miaohe Lin Cc: Naoya Horiguchi Cc: Ryan Roberts Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 3 +++ mm/migrate.c | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1b81680b4225..1d1b74950332 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -4109,6 +4109,9 @@ static bool thp_underused(struct folio *folio) if (khugepaged_max_ptes_none == HPAGE_PMD_NR - 1) return false; + if (folio_contain_hwpoisoned_page(folio)) + return false; + for (i = 0; i < folio_nr_pages(folio); i++) { if (pages_identical(folio_page(folio, i), ZERO_PAGE(0))) { if (++num_zero_pages > khugepaged_max_ptes_none) diff --git a/mm/migrate.c b/mm/migrate.c index e3065c9edb55..c0e9f15be2a2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -301,8 +301,9 @@ static bool try_to_map_unused_to_zeropage(struct page_vma_mapped_walk *pvmw, struct page *page = folio_page(folio, idx); pte_t newpte; - if (PageCompound(page)) + if (PageCompound(page) || PageHWPoison(page)) return false; + VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(pte_present(old_pte), page); From ea138a607709bf72c162f62d2a670fe899d73daa Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 15 Oct 2025 17:17:09 -0700 Subject: [PATCH 0092/1075] RISC-V: KVM: Fix check for local interrupts on riscv32 To set all 64 bits in the mask on a 32-bit system, the constant must have type `unsigned long long`. Fixes: 6b1e8ba4bac4 ("RISC-V: KVM: Use bitmap for irqs_pending and irqs_pending_mask") Signed-off-by: Samuel Holland Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20251016001714.3889380-1-samuel.holland@sifive.com Signed-off-by: Anup Patel --- arch/riscv/kvm/vcpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index bccb919ca615..5ce35aba6069 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -212,7 +212,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && + return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) && !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } From 4314ffce4eb81a6c18700af1b6e29b6e0c6b9e37 Mon Sep 17 00:00:00 2001 From: Mikhail Kshevetskiy Date: Sun, 12 Oct 2025 15:16:52 +0300 Subject: [PATCH 0093/1075] spi: airoha: return an error for continuous mode dirmap creation cases This driver can accelerate single page operations only, thus continuous reading mode should not be used. Continuous reading will use sizes up to the size of one erase block. This size is much larger than the size of single flash page. Use this difference to identify continuous reading and return an error. Signed-off-by: Mikhail Kshevetskiy Reviewed-by: Frieder Schrempf Reviewed-by: AngeloGioacchino Del Regno Fixes: a403997c12019 ("spi: airoha: add SPI-NAND Flash controller driver") Link: https://patch.msgid.link/20251012121707.2296160-2-mikhail.kshevetskiy@iopsys.eu Signed-off-by: Mark Brown --- drivers/spi/spi-airoha-snfi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index dbe640986825..043a03cd90a1 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -618,6 +618,10 @@ static int airoha_snand_dirmap_create(struct spi_mem_dirmap_desc *desc) if (desc->info.offset + desc->info.length > U32_MAX) return -EINVAL; + /* continuous reading is not supported */ + if (desc->info.length > SPI_NAND_CACHE_SIZE) + return -E2BIG; + if (!airoha_snand_supports_op(desc->mem, &desc->info.op_tmpl)) return -EOPNOTSUPP; From edd2e261b1babb92213089b5feadca12e3459322 Mon Sep 17 00:00:00 2001 From: Mikhail Kshevetskiy Date: Sun, 12 Oct 2025 15:16:54 +0300 Subject: [PATCH 0094/1075] spi: airoha: add support of dual/quad wires spi modes to exec_op() handler Booting without this patch and disabled dirmap support results in [ 2.980719] spi-nand spi0.0: Micron SPI NAND was found. [ 2.986040] spi-nand spi0.0: 256 MiB, block size: 128 KiB, page size: 2048, OOB size: 128 [ 2.994709] 2 fixed-partitions partitions found on MTD device spi0.0 [ 3.001075] Creating 2 MTD partitions on "spi0.0": [ 3.005862] 0x000000000000-0x000000020000 : "bl2" [ 3.011272] 0x000000020000-0x000010000000 : "ubi" ... [ 6.195594] ubi0: attaching mtd1 [ 13.338398] ubi0: scanning is finished [ 13.342188] ubi0 error: ubi_read_volume_table: the layout volume was not found [ 13.349784] ubi0 error: ubi_attach_mtd_dev: failed to attach mtd1, error -22 [ 13.356897] UBI error: cannot attach mtd1 If dirmap is disabled or not supported in the spi driver, the dirmap requests will be executed via exec_op() handler. Thus, if the hardware supports dual/quad spi modes, then corresponding requests will be sent to exec_op() handler. Current driver does not support such requests, so error is arrised. As result the flash can't be read/write. This patch adds support of dual and quad wires spi modes to exec_op() handler. Fixes: a403997c12019 ("spi: airoha: add SPI-NAND Flash controller driver") Signed-off-by: Mikhail Kshevetskiy Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20251012121707.2296160-4-mikhail.kshevetskiy@iopsys.eu Signed-off-by: Mark Brown --- drivers/spi/spi-airoha-snfi.c | 108 ++++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 26 deletions(-) diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index 043a03cd90a1..0b89dc42545b 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -192,6 +192,14 @@ #define SPI_NAND_OP_RESET 0xff #define SPI_NAND_OP_DIE_SELECT 0xc2 +/* SNAND FIFO commands */ +#define SNAND_FIFO_TX_BUSWIDTH_SINGLE 0x08 +#define SNAND_FIFO_TX_BUSWIDTH_DUAL 0x09 +#define SNAND_FIFO_TX_BUSWIDTH_QUAD 0x0a +#define SNAND_FIFO_RX_BUSWIDTH_SINGLE 0x0c +#define SNAND_FIFO_RX_BUSWIDTH_DUAL 0x0e +#define SNAND_FIFO_RX_BUSWIDTH_QUAD 0x0f + #define SPI_NAND_CACHE_SIZE (SZ_4K + SZ_256) #define SPI_MAX_TRANSFER_SIZE 511 @@ -387,10 +395,26 @@ static int airoha_snand_set_mode(struct airoha_snand_ctrl *as_ctrl, return regmap_write(as_ctrl->regmap_ctrl, REG_SPI_CTRL_DUMMY, 0); } -static int airoha_snand_write_data(struct airoha_snand_ctrl *as_ctrl, u8 cmd, - const u8 *data, int len) +static int airoha_snand_write_data(struct airoha_snand_ctrl *as_ctrl, + const u8 *data, int len, int buswidth) { int i, data_len; + u8 cmd; + + switch (buswidth) { + case 0: + case 1: + cmd = SNAND_FIFO_TX_BUSWIDTH_SINGLE; + break; + case 2: + cmd = SNAND_FIFO_TX_BUSWIDTH_DUAL; + break; + case 4: + cmd = SNAND_FIFO_TX_BUSWIDTH_QUAD; + break; + default: + return -EINVAL; + } for (i = 0; i < len; i += data_len) { int err; @@ -409,16 +433,32 @@ static int airoha_snand_write_data(struct airoha_snand_ctrl *as_ctrl, u8 cmd, return 0; } -static int airoha_snand_read_data(struct airoha_snand_ctrl *as_ctrl, u8 *data, - int len) +static int airoha_snand_read_data(struct airoha_snand_ctrl *as_ctrl, + u8 *data, int len, int buswidth) { int i, data_len; + u8 cmd; + + switch (buswidth) { + case 0: + case 1: + cmd = SNAND_FIFO_RX_BUSWIDTH_SINGLE; + break; + case 2: + cmd = SNAND_FIFO_RX_BUSWIDTH_DUAL; + break; + case 4: + cmd = SNAND_FIFO_RX_BUSWIDTH_QUAD; + break; + default: + return -EINVAL; + } for (i = 0; i < len; i += data_len) { int err; data_len = min(len - i, SPI_MAX_TRANSFER_SIZE); - err = airoha_snand_set_fifo_op(as_ctrl, 0xc, data_len); + err = airoha_snand_set_fifo_op(as_ctrl, cmd, data_len); if (err) return err; @@ -902,12 +942,28 @@ static ssize_t airoha_snand_dirmap_write(struct spi_mem_dirmap_desc *desc, static int airoha_snand_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) { - u8 data[8], cmd, opcode = op->cmd.opcode; struct airoha_snand_ctrl *as_ctrl; + int op_len, addr_len, dummy_len; + u8 buf[20], *data; int i, err; as_ctrl = spi_controller_get_devdata(mem->spi->controller); + op_len = op->cmd.nbytes; + addr_len = op->addr.nbytes; + dummy_len = op->dummy.nbytes; + + if (op_len + dummy_len + addr_len > sizeof(buf)) + return -EIO; + + data = buf; + for (i = 0; i < op_len; i++) + *data++ = op->cmd.opcode >> (8 * (op_len - i - 1)); + for (i = 0; i < addr_len; i++) + *data++ = op->addr.val >> (8 * (addr_len - i - 1)); + for (i = 0; i < dummy_len; i++) + *data++ = 0xff; + /* switch to manual mode */ err = airoha_snand_set_mode(as_ctrl, SPI_MODE_MANUAL); if (err < 0) @@ -918,40 +974,40 @@ static int airoha_snand_exec_op(struct spi_mem *mem, return err; /* opcode */ - err = airoha_snand_write_data(as_ctrl, 0x8, &opcode, sizeof(opcode)); + data = buf; + err = airoha_snand_write_data(as_ctrl, data, op_len, + op->cmd.buswidth); if (err) return err; /* addr part */ - cmd = opcode == SPI_NAND_OP_GET_FEATURE ? 0x11 : 0x8; - put_unaligned_be64(op->addr.val, data); - - for (i = ARRAY_SIZE(data) - op->addr.nbytes; - i < ARRAY_SIZE(data); i++) { - err = airoha_snand_write_data(as_ctrl, cmd, &data[i], - sizeof(data[0])); + data += op_len; + if (addr_len) { + err = airoha_snand_write_data(as_ctrl, data, addr_len, + op->addr.buswidth); if (err) return err; } /* dummy */ - data[0] = 0xff; - for (i = 0; i < op->dummy.nbytes; i++) { - err = airoha_snand_write_data(as_ctrl, 0x8, &data[0], - sizeof(data[0])); + data += addr_len; + if (dummy_len) { + err = airoha_snand_write_data(as_ctrl, data, dummy_len, + op->dummy.buswidth); if (err) return err; } /* data */ - if (op->data.dir == SPI_MEM_DATA_IN) { - err = airoha_snand_read_data(as_ctrl, op->data.buf.in, - op->data.nbytes); - if (err) - return err; - } else { - err = airoha_snand_write_data(as_ctrl, 0x8, op->data.buf.out, - op->data.nbytes); + if (op->data.nbytes) { + if (op->data.dir == SPI_MEM_DATA_IN) + err = airoha_snand_read_data(as_ctrl, op->data.buf.in, + op->data.nbytes, + op->data.buswidth); + else + err = airoha_snand_write_data(as_ctrl, op->data.buf.out, + op->data.nbytes, + op->data.buswidth); if (err) return err; } From 20d7b236b78c7ec685a22db5689b9c829975e0c3 Mon Sep 17 00:00:00 2001 From: Mikhail Kshevetskiy Date: Sun, 12 Oct 2025 15:16:56 +0300 Subject: [PATCH 0095/1075] spi: airoha: switch back to non-dma mode in the case of error Current dirmap code does not switch back to non-dma mode in the case of error. This is wrong. This patch fixes dirmap read/write error path. Fixes: a403997c12019 ("spi: airoha: add SPI-NAND Flash controller driver") Signed-off-by: Mikhail Kshevetskiy Acked-by: Lorenzo Bianconi Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20251012121707.2296160-6-mikhail.kshevetskiy@iopsys.eu Signed-off-by: Mark Brown --- drivers/spi/spi-airoha-snfi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index 0b89dc42545b..8143fbb0cf4e 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -698,13 +698,13 @@ static ssize_t airoha_snand_dirmap_read(struct spi_mem_dirmap_desc *desc, err = airoha_snand_nfi_config(as_ctrl); if (err) - return err; + goto error_dma_mode_off; dma_addr = dma_map_single(as_ctrl->dev, txrx_buf, SPI_NAND_CACHE_SIZE, DMA_FROM_DEVICE); err = dma_mapping_error(as_ctrl->dev, dma_addr); if (err) - return err; + goto error_dma_mode_off; /* set dma addr */ err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_STRADDR, @@ -804,6 +804,8 @@ static ssize_t airoha_snand_dirmap_read(struct spi_mem_dirmap_desc *desc, error_dma_unmap: dma_unmap_single(as_ctrl->dev, dma_addr, SPI_NAND_CACHE_SIZE, DMA_FROM_DEVICE); +error_dma_mode_off: + airoha_snand_set_mode(as_ctrl, SPI_MODE_MANUAL); return err; } @@ -936,6 +938,7 @@ static ssize_t airoha_snand_dirmap_write(struct spi_mem_dirmap_desc *desc, error_dma_unmap: dma_unmap_single(as_ctrl->dev, dma_addr, SPI_NAND_CACHE_SIZE, DMA_TO_DEVICE); + airoha_snand_set_mode(as_ctrl, SPI_MODE_MANUAL); return err; } From 0b7d9b25e4bc2e478c9d06281a65f930769fca09 Mon Sep 17 00:00:00 2001 From: Mikhail Kshevetskiy Date: Sun, 12 Oct 2025 15:16:57 +0300 Subject: [PATCH 0096/1075] spi: airoha: fix reading/writing of flashes with more than one plane per lun Attaching UBI on the flash with more than one plane per lun will lead to the following error: [ 2.980989] spi-nand spi0.0: Micron SPI NAND was found. [ 2.986309] spi-nand spi0.0: 256 MiB, block size: 128 KiB, page size: 2048, OOB size: 128 [ 2.994978] 2 fixed-partitions partitions found on MTD device spi0.0 [ 3.001350] Creating 2 MTD partitions on "spi0.0": [ 3.006159] 0x000000000000-0x000000020000 : "bl2" [ 3.011663] 0x000000020000-0x000010000000 : "ubi" ... [ 6.391748] ubi0: attaching mtd1 [ 6.412545] ubi0 error: ubi_attach: PEB 0 contains corrupted VID header, and the data does not contain all 0xFF [ 6.422677] ubi0 error: ubi_attach: this may be a non-UBI PEB or a severe VID header corruption which requires manual inspection [ 6.434249] Volume identifier header dump: [ 6.438349] magic 55424923 [ 6.441482] version 1 [ 6.444007] vol_type 0 [ 6.446539] copy_flag 0 [ 6.449068] compat 0 [ 6.451594] vol_id 0 [ 6.454120] lnum 1 [ 6.456651] data_size 4096 [ 6.459442] used_ebs 1061644134 [ 6.462748] data_pad 0 [ 6.465274] sqnum 0 [ 6.467805] hdr_crc 61169820 [ 6.470943] Volume identifier header hexdump: [ 6.475308] hexdump of PEB 0 offset 4096, length 126976 [ 6.507391] ubi0 warning: ubi_attach: valid VID header but corrupted EC header at PEB 4 [ 6.515415] ubi0 error: ubi_compare_lebs: unsupported on-flash UBI format [ 6.522222] ubi0 error: ubi_attach_mtd_dev: failed to attach mtd1, error -22 [ 6.529294] UBI error: cannot attach mtd1 Non dirmap reading works good. Looking to spi_mem_no_dirmap_read() code we'll see: static ssize_t spi_mem_no_dirmap_read(struct spi_mem_dirmap_desc *desc, u64 offs, size_t len, void *buf) { struct spi_mem_op op = desc->info.op_tmpl; int ret; // --- see here --- op.addr.val = desc->info.offset + offs; //----------------- op.data.buf.in = buf; op.data.nbytes = len; ret = spi_mem_adjust_op_size(desc->mem, &op); if (ret) return ret; ret = spi_mem_exec_op(desc->mem, &op); if (ret) return ret; return op.data.nbytes; } The similar happens for spi_mem_no_dirmap_write(). Thus the address passed to the flash should take in the account the value of desc->info.offset. This patch fix dirmap reading/writing of flashes with more than one plane per lun. Fixes: a403997c12019 ("spi: airoha: add SPI-NAND Flash controller driver") Signed-off-by: Mikhail Kshevetskiy Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20251012121707.2296160-7-mikhail.kshevetskiy@iopsys.eu Signed-off-by: Mark Brown --- drivers/spi/spi-airoha-snfi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index 8143fbb0cf4e..b78163eaed61 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -733,8 +733,9 @@ static ssize_t airoha_snand_dirmap_read(struct spi_mem_dirmap_desc *desc, if (err) goto error_dma_unmap; - /* set read addr */ - err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_RD_CTL3, 0x0); + /* set read addr: zero page offset + descriptor read offset */ + err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_RD_CTL3, + desc->info.offset); if (err) goto error_dma_unmap; @@ -870,7 +871,9 @@ static ssize_t airoha_snand_dirmap_write(struct spi_mem_dirmap_desc *desc, if (err) goto error_dma_unmap; - err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_PG_CTL2, 0x0); + /* set write addr: zero page offset + descriptor write offset */ + err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_PG_CTL2, + desc->info.offset); if (err) goto error_dma_unmap; From 830d68f2cb8ab6fb798bb9555016709a9e012af0 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 6 Oct 2025 08:35:41 -0700 Subject: [PATCH 0097/1075] drm/msm: Fix pgtable prealloc error path The following splat was reported: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000010 Mem abort info: ESR = 0x0000000096000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x04: level 0 translation fault Data abort info: ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 CM = 0, WnR = 0, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=00000008d0fd8000 [0000000000000010] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 0000000096000004 [#1] SMP CPU: 5 UID: 1000 PID: 149076 Comm: Xwayland Tainted: G S 6.16.0-rc2-00809-g0b6974bb4134-dirty #367 PREEMPT Tainted: [S]=CPU_OUT_OF_SPEC Hardware name: Qualcomm Technologies, Inc. SM8650 HDK (DT) pstate: 83400005 (Nzcv daif +PAN -UAO +TCO +DIT -SSBS BTYPE=--) pc : build_detached_freelist+0x28/0x224 lr : kmem_cache_free_bulk.part.0+0x38/0x244 sp : ffff000a508c7a20 x29: ffff000a508c7a20 x28: ffff000a508c7d50 x27: ffffc4e49d16f350 x26: 0000000000000058 x25: 00000000fffffffc x24: 0000000000000000 x23: ffff00098c4e1450 x22: 00000000fffffffc x21: 0000000000000000 x20: ffff000a508c7af8 x19: 0000000000000002 x18: 00000000000003e8 x17: ffff000809523850 x16: ffff000809523820 x15: 0000000000401640 x14: ffff000809371140 x13: 0000000000000130 x12: ffff0008b5711e30 x11: 00000000001058fa x10: 0000000000000a80 x9 : ffff000a508c7940 x8 : ffff000809371ba0 x7 : 781fffe033087fff x6 : 0000000000000000 x5 : ffff0008003cd000 x4 : 781fffe033083fff x3 : ffff000a508c7af8 x2 : fffffdffc0000000 x1 : 0001000000000000 x0 : ffff0008001a6a00 Call trace: build_detached_freelist+0x28/0x224 (P) kmem_cache_free_bulk.part.0+0x38/0x244 kmem_cache_free_bulk+0x10/0x1c msm_iommu_pagetable_prealloc_cleanup+0x3c/0xd0 msm_vma_job_free+0x30/0x240 msm_ioctl_vm_bind+0x1d0/0x9a0 drm_ioctl_kernel+0x84/0x104 drm_ioctl+0x358/0x4d4 __arm64_sys_ioctl+0x8c/0xe0 invoke_syscall+0x44/0x100 el0_svc_common.constprop.0+0x3c/0xe0 do_el0_svc+0x18/0x20 el0_svc+0x30/0x100 el0t_64_sync_handler+0x104/0x130 el0t_64_sync+0x170/0x174 Code: aa0203f5 b26287e2 f2dfbfe2 aa0303f4 (f8737ab6) ---[ end trace 0000000000000000 ]--- Since msm_vma_job_free() is called directly from the ioctl, this looks like an error path cleanup issue. Which I think results from prealloc_cleanup() called without a preceding successful prealloc_allocate() call. So handle that case better. Reported-by: Connor Abbott Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/678677/ Message-ID: <20251006153542.419998-1-robin.clark@oss.qualcomm.com> --- drivers/gpu/drm/msm/msm_iommu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 0e18619f96cb..a188617653e8 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -338,6 +338,8 @@ msm_iommu_pagetable_prealloc_allocate(struct msm_mmu *mmu, struct msm_mmu_preall ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, p->count, p->pages); if (ret != p->count) { + kfree(p->pages); + p->pages = NULL; p->count = ret; return -ENOMEM; } @@ -351,6 +353,9 @@ msm_iommu_pagetable_prealloc_cleanup(struct msm_mmu *mmu, struct msm_mmu_preallo struct kmem_cache *pt_cache = get_pt_cache(mmu); uint32_t remaining_pt_count = p->count - p->ptr; + if (!p->pages) + return; + if (p->count > 0) trace_msm_mmu_prealloc_cleanup(p->count, remaining_pt_count); From b4789aac9d3441d9f830f0a4022d8dc122d6cab3 Mon Sep 17 00:00:00 2001 From: Akhil P Oommen Date: Thu, 11 Sep 2025 02:14:05 +0530 Subject: [PATCH 0098/1075] drm/msm/a6xx: Fix GMU firmware parser Current parser logic for GMU firmware assumes a dword aligned payload size for every block. This is not true for all GMU firmwares. So, fix this by using correct 'size' value in the calculation for the offset for the next block's header. Fixes: c6ed04f856a4 ("drm/msm/a6xx: A640/A650 GMU firmware path") Signed-off-by: Akhil P Oommen Acked-by: Konrad Dybcio Patchwork: https://patchwork.freedesktop.org/patch/674040/ Message-ID: <20250911-assorted-sept-1-v2-2-a8bf1ee20792@oss.qualcomm.com> Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index fc62fef2fed8..4e6dc16e4a4c 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -780,6 +780,9 @@ static bool fw_block_mem(struct a6xx_gmu_bo *bo, const struct block_header *blk) return true; } +#define NEXT_BLK(blk) \ + ((const struct block_header *)((const char *)(blk) + sizeof(*(blk)) + (blk)->size)) + static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) { struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu); @@ -811,7 +814,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) for (blk = (const struct block_header *) fw_image->data; (const u8*) blk < fw_image->data + fw_image->size; - blk = (const struct block_header *) &blk->data[blk->size >> 2]) { + blk = NEXT_BLK(blk)) { if (blk->size == 0) continue; From 86404a9e3013d814a772ac407573be5d3cd4ee0d Mon Sep 17 00:00:00 2001 From: Anna Maniscalco Date: Sat, 11 Oct 2025 15:45:10 +0200 Subject: [PATCH 0099/1075] drm/msm: make sure last_fence is always updated Update last_fence in the vm-bind path instead of kernel managed path. last_fence is used to wait for work to finish in vm_bind contexts but not used for kernel managed contexts. This fixes a bug where last_fence is not waited on context close leading to faults as resources are freed while in use. Fixes: 92395af63a99 ("drm/msm: Add VM_BIND submitqueue") Signed-off-by: Anna Maniscalco Patchwork: https://patchwork.freedesktop.org/patch/680080/ Message-ID: <20251011-close_fence_wait_fix-v3-1-5134787755ff@gmail.com> Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem_submit.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3ab3b27134f9..75d9f3574370 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -414,6 +414,11 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) submit->user_fence, DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP); + + last_fence = vm->last_fence; + vm->last_fence = dma_fence_unwrap_merge(submit->user_fence, last_fence); + dma_fence_put(last_fence); + return; } @@ -427,10 +432,6 @@ static void submit_attach_object_fences(struct msm_gem_submit *submit) dma_resv_add_fence(obj->resv, submit->user_fence, DMA_RESV_USAGE_READ); } - - last_fence = vm->last_fence; - vm->last_fence = dma_fence_unwrap_merge(submit->user_fence, last_fence); - dma_fence_put(last_fence); } static int submit_bo(struct msm_gem_submit *submit, uint32_t idx, From 7f9335f2d99815199d9d8d8e6687e68989728ce3 Mon Sep 17 00:00:00 2001 From: Vignesh Raman Date: Mon, 13 Oct 2025 11:32:10 +0530 Subject: [PATCH 0100/1075] drm/ci: disable broken MR check in sanity job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GitLab recently changed the required permissions for the are-developers-allowed-to-push-to-my-MR check: https://gitlab.freedesktop.org/freedesktop/ci-templates/-/issues/81 Until that’s resolved, disable the check - it’s mostly obsolete anyway. Based on https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37782 Signed-off-by: Vignesh Raman Reviewed-by: Daniel Stone Patchwork: https://patchwork.freedesktop.org/patch/680263/ Message-ID: <20251013060212.14583-1-vignesh.raman@collabora.com> Signed-off-by: Rob Clark --- drivers/gpu/drm/ci/gitlab-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ci/gitlab-ci.yml b/drivers/gpu/drm/ci/gitlab-ci.yml index d502d146b177..56638814bb28 100644 --- a/drivers/gpu/drm/ci/gitlab-ci.yml +++ b/drivers/gpu/drm/ci/gitlab-ci.yml @@ -280,7 +280,7 @@ sanity: GIT_STRATEGY: none script: # ci-fairy check-commits --junit-xml=check-commits.xml - - ci-fairy check-merge-request --require-allow-collaboration --junit-xml=check-merge-request.xml + # - ci-fairy check-merge-request --require-allow-collaboration --junit-xml=check-merge-request.xml - | set -eu image_tags=( From c700e7279b29948f5d2aee30df2dbd3124df3b9c Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 10 Oct 2025 10:31:41 -0700 Subject: [PATCH 0101/1075] drm/rockchip: dw_hdmi: use correct SCLIN mask for RK3228 In dw_hdmi_rk3228_setup_hpd(), the SCLIN mask incorrectly references the RK3328 variant. This change updates it to the RK3228-specific mask RK3228_HDMI_SCLIN_MSK using FIELD_PREP_WM16, ensuring proper HPD and I2C pin configuration for RK3228. Change: RK3328_HDMI_SCLIN_MSK -> RK3228_HDMI_SCLIN_MSK Fixes: 63df37f3fc71 ("drm/rockchip: dw_hdmi: switch to FIELD_PREP_WM16* macros") Signed-off-by: Alok Tiwari Reviewed-by: Nicolas Frattaroli Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20251010173143.72733-1-alok.a.tiwari@oracle.com --- drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 7b613997bb50..727cdf768161 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -361,7 +361,7 @@ static void dw_hdmi_rk3228_setup_hpd(struct dw_hdmi *dw_hdmi, void *data) regmap_write(hdmi->regmap, RK3228_GRF_SOC_CON2, FIELD_PREP_WM16(RK3228_HDMI_SDAIN_MSK, 1) | - FIELD_PREP_WM16(RK3328_HDMI_SCLIN_MSK, 1)); + FIELD_PREP_WM16(RK3228_HDMI_SCLIN_MSK, 1)); } static enum drm_connector_status From a3c4a0a42e61aad1056a3d33fd603c1ae66d4288 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Thu, 16 Oct 2025 11:11:26 -0700 Subject: [PATCH 0102/1075] sched_ext: fix flag check for deferred callbacks When scheduling the deferred balance callbacks, check SCX_RQ_BAL_CB_PENDING instead of SCX_RQ_BAL_PENDING. This way schedule_deferred() properly tests whether there is already a pending request for queue_balance_callback() to be invoked at the end of .balance(). Fixes: a8ad873113d3 ("sched_ext: defer queue_balance_callback() until after ops.dispatch") Signed-off-by: Emil Tsalapatis Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 12c9c3595692..ecb251e883ea 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -792,7 +792,7 @@ static void schedule_deferred(struct rq *rq) return; /* Don't do anything if there already is a deferred operation. */ - if (rq->scx.flags & SCX_RQ_BAL_PENDING) + if (rq->scx.flags & SCX_RQ_BAL_CB_PENDING) return; /* From 75cea9860aa6b2350d90a8d78fed114d27c7eca2 Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 14 Oct 2025 20:35:28 +0200 Subject: [PATCH 0103/1075] net: usb: rtl8150: Fix frame padding TX frames aren't padded and unknown memory is sent into the ether. Theoretically, it isn't even guaranteed that the extra memory exists and can be sent out, which could cause further problems. In practice, I found that plenty of tailroom exists in the skb itself (in my test with ping at least) and skb_padto() easily succeeds, so use it here. In the event of -ENOMEM drop the frame like other drivers do. The use of one more padding byte instead of a USB zero-length packet is retained to avoid regression. I have a dodgy Etron xHCI controller which doesn't seem to support sending ZLPs at all. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Michal Pecio Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251014203528.3f9783c4.michal.pecio@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/rtl8150.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c index 92add3daadbb..278e6cb6f4d9 100644 --- a/drivers/net/usb/rtl8150.c +++ b/drivers/net/usb/rtl8150.c @@ -685,9 +685,16 @@ static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb, rtl8150_t *dev = netdev_priv(netdev); int count, res; + /* pad the frame and ensure terminating USB packet, datasheet 9.2.3 */ + count = max(skb->len, ETH_ZLEN); + if (count % 64 == 0) + count++; + if (skb_padto(skb, count)) { + netdev->stats.tx_dropped++; + return NETDEV_TX_OK; + } + netif_stop_queue(netdev); - count = (skb->len < 60) ? 60 : skb->len; - count = (count & 0x3f) ? count : count + 1; dev->tx_skb = skb; usb_fill_bulk_urb(dev->tx_urb, dev->udev, usb_sndbulkpipe(dev->udev, 2), skb->data, count, write_bulk_callback, dev); From aaf043a5688114703ae2c1482b92e7e0754d684e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 14 Oct 2025 13:46:49 -0700 Subject: [PATCH 0104/1075] net/mlx5e: Return 1 instead of 0 in invalid case in mlx5e_mpwrq_umr_entry_size() When building with Clang 20 or newer, there are some objtool warnings from unexpected fallthroughs to other functions: vmlinux.o: warning: objtool: mlx5e_mpwrq_mtts_per_wqe() falls through to next function mlx5e_mpwrq_max_num_entries() vmlinux.o: warning: objtool: mlx5e_mpwrq_max_log_rq_size() falls through to next function mlx5e_get_linear_rq_headroom() LLVM 20 contains an (admittedly problematic [1]) optimization [2] to convert divide by zero into the equivalent of __builtin_unreachable(), which invokes undefined behavior and destroys code generation when it is encountered in a control flow graph. mlx5e_mpwrq_umr_entry_size() returns 0 in the default case of an unrecognized mlx5e_mpwrq_umr_mode value. mlx5e_mpwrq_mtts_per_wqe(), which is inlined into mlx5e_mpwrq_max_log_rq_size(), uses the result of mlx5e_mpwrq_umr_entry_size() in a divide operation without checking for zero, so LLVM is able to infer there will be a divide by zero in this case and invokes undefined behavior. While there is some proposed work to isolate this undefined behavior and avoid the destructive code generation that results in these objtool warnings, code should still be defensive against divide by zero. As the WARN_ONCE() implies that an invalid value should be handled gracefully, return 1 instead of 0 in the default case so that the results of this division operation is always valid. Fixes: 168723c1f8d6 ("net/mlx5e: xsk: Use umr_mode to calculate striding RQ parameters") Link: https://lore.kernel.org/CAGG=3QUk8-Ak7YKnRziO4=0z=1C_7+4jF+6ZeDQ9yF+kuTOHOQ@mail.gmail.com/ [1] Link: https://github.com/llvm/llvm-project/commit/37932643abab699e8bb1def08b7eb4eae7ff1448 [2] Closes: https://github.com/ClangBuiltLinux/linux/issues/2131 Closes: https://github.com/ClangBuiltLinux/linux/issues/2132 Signed-off-by: Nathan Chancellor Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20251014-mlx5e-avoid-zero-div-from-mlx5e_mpwrq_umr_entry_size-v1-1-dc186b8819ef@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 3692298e10f2..c9bdee9a8b30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -100,7 +100,7 @@ u8 mlx5e_mpwrq_umr_entry_size(enum mlx5e_mpwrq_umr_mode mode) return sizeof(struct mlx5_ksm) * 4; } WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", mode); - return 0; + return 1; } u8 mlx5e_mpwrq_log_wqe_sz(struct mlx5_core_dev *mdev, u8 page_shift, From d0d3e9c2867b32c9c70e39e74b9425871cf0042a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Oct 2025 06:32:21 +0000 Subject: [PATCH 0105/1075] net: gro: clear skb_shinfo(skb)->hwtstamps in napi_reuse_skb() Some network drivers assume this field is zero after napi_get_frags(). We must clear it in napi_reuse_skb() otherwise the following can happen: 1) A packet is received, and skb_shinfo(skb)->hwtstamps is populated because a bit in the receive descriptor announced hwtstamp availability for this packet. 2) Packet is given to gro layer via napi_gro_frags(). 3) Packet is merged to a prior one held in GRO queues. 4) skb is saved after some cleanup in napi->skb via a call to napi_reuse_skb(). 5) Next packet is received 10 seconds later, gets the recycled skb from napi_get_frags(). 6) The receive descriptor does not announce hwtstamp availability. Driver does not clear shinfo->hwtstamps. 7) We have in shinfo->hwtstamps an old timestamp. Fixes: ac45f602ee3d ("net: infrastructure for hardware time stamping") Signed-off-by: Eric Dumazet Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20251015063221.4171986-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/gro.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/core/gro.c b/net/core/gro.c index 5ba4504cfd28..76f9c3712422 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -639,6 +639,8 @@ EXPORT_SYMBOL(gro_receive_skb); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { + struct skb_shared_info *shinfo; + if (unlikely(skb->pfmemalloc)) { consume_skb(skb); return; @@ -655,8 +657,12 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->encapsulation = 0; skb->ip_summed = CHECKSUM_NONE; - skb_shinfo(skb)->gso_type = 0; - skb_shinfo(skb)->gso_size = 0; + + shinfo = skb_shinfo(skb); + shinfo->gso_type = 0; + shinfo->gso_size = 0; + shinfo->hwtstamps.hwtstamp = 0; + if (unlikely(skb->slow_gro)) { skb_orphan(skb); skb_ext_reset(skb); From 5348d6312446929edabced02bd6438bfe5220e31 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Wed, 15 Oct 2025 10:05:23 +0300 Subject: [PATCH 0106/1075] net/mlx5e: psp, avoid 'accel' NULL pointer dereference The 'accel' parameter of mlx5e_txwqe_build_eseg_csum() and the similar 'state' parameter of mlx5e_accel_tx_ids_len() were NULL when called from mlx5i_sq_xmit() and were causing kernel panics from that context. Fix that by passing in a local empty mlx5e_accel_tx_state variable, thus guaranteeing that 'accel' is never NULL. Also remove an unnecessary check from mlx5e_tx_wqe_inline_mode(). Fixes: e5a1861a298e ("net/mlx5e: Implement PSP Tx data path") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/1760511923-890650-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index b7227afcb51d..2702b3885f06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -256,7 +256,7 @@ mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct sk_buff *skb, u8 mode; #ifdef CONFIG_MLX5_EN_TLS - if (accel && accel->tls.tls_tisn) + if (accel->tls.tls_tisn) return MLX5_INLINE_MODE_TCP_UDP; #endif @@ -982,6 +982,7 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_attr attr; struct mlx5i_tx_wqe *wqe; + struct mlx5e_accel_tx_state accel = {}; struct mlx5_wqe_datagram_seg *datagram; struct mlx5_wqe_ctrl_seg *cseg; struct mlx5_wqe_eth_seg *eseg; @@ -992,7 +993,7 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, int num_dma; u16 pi; - mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); + mlx5e_sq_xmit_prepare(sq, skb, &accel, &attr); mlx5i_sq_calc_wqe_attr(skb, &attr, &wqe_attr); pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); @@ -1009,7 +1010,7 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); - mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, eseg); + mlx5e_txwqe_build_eseg_csum(sq, skb, &accel, eseg); eseg->mss = attr.mss; From 1b0124ad5039678a9dfafb6aafef6f430a246b91 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Wed, 15 Oct 2025 16:25:41 +0700 Subject: [PATCH 0107/1075] net: rmnet: Fix checksum offload header v5 and aggregation packet formatting Packet format for checksum offload header v5 and aggregation, and header type table for the former, are shown in normal paragraphs instead. Use appropriate markup. Signed-off-by: Bagas Sanjaya Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251015092540.32282-2-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- .../device_drivers/cellular/qualcomm/rmnet.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst index 289c146a8291..6877a3260582 100644 --- a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst +++ b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst @@ -137,16 +137,20 @@ d. Checksum offload header v5 Checksum offload header fields are in big endian format. +Packet format:: + Bit 0 - 6 7 8-15 16-31 Function Header Type Next Header Checksum Valid Reserved Header Type is to indicate the type of header, this usually is set to CHECKSUM Header types -= ========================================== + += =============== 0 Reserved 1 Reserved 2 checksum header += =============== Checksum Valid is to indicate whether the header checksum is valid. Value of 1 implies that checksum is calculated on this packet and is valid, value of 0 @@ -183,9 +187,11 @@ rmnet in a single linear skb. rmnet will process the individual packets and either ACK the MAP command or deliver the IP packet to the network stack as needed -MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding.... +Packet format:: -MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad... + MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding.... + + MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad... 3. Userspace configuration ========================== From 4a9cb2eecc78fa9d388481762dd798fa770e1971 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 10 Oct 2025 19:43:49 +0200 Subject: [PATCH 0108/1075] docs: rust: add section on imports formatting `rustfmt`, by default, formats imports in a way that is prone to conflicts while merging and rebasing, since in some cases it condenses several items into the same line. For instance, Linus mentioned [1] that the following case: use crate::{ fmt, page::AsPageIter, }; is compressed by `rustfmt` into: use crate::{fmt, page::AsPageIter}; which is undesirable. Similarly, `rustfmt` may put several items in the same line even if the braces span already multiple lines, e.g.: use kernel::{ acpi, c_str, device::{property, Core}, of, platform, }; The options that control the formatting behavior around imports are generally unstable, and `rustfmt` releases do not allow to use nightly features, unlike the compiler and other Rust tooling [2]. For the moment, we can introduce a workaround to prevent `rustfmt` from compressing the example above -- the "trailing empty comment": use crate::{ fmt, page::AsPageIter, // }; which is reminiscent of the trailing comma behavior in other formatters. We already used empty comments for formatting purposes in the past, e.g. in commit b9b701fce49a ("rust: clarify the language unstable features in use"). In addition, `rustfmt` actually reformats with a vertical layout (i.e. it does not put two items in the same line) when seeing such a comment, i.e. it doesn't just preserve the formatting, which is good in the sense that we can use it to easily reformat some imports, since it matches the style we generally want to have. A Git merge driver would help (suggested by Gary and Wedson), though maintainers would need to set it up, the diffs would still be larger and the formatting rules for imports would remain hard to predict. Thus document the style that we will follow in the coding guidelines by introducing a new section and explain how the trailing empty comment works there too. We discussed the issue with upstream Rust in our usual Rust <-> Rust for Linux meeting [3], and there have also been a few other discussions in parallel in issues [4][5] and Zulip [6]. We will see what happens, but upstream Rust has already created a subteam of `rustfmt` to try to overcome the bandwidth issue [7], which is a good signal, and some organization work has already started (e.g. tracking issues). We will continue our discussions with them about it. Cc: Caleb Cartwright Cc: Yacin Tmimi Cc: Manish Goregaokar Cc: Deadbeef Cc: Cameron Steffen Cc: Jieyou Xu Link: https://lore.kernel.org/all/CAHk-=wgO7S_FZUSBbngG5vtejWOpzDfTTBkVvP3_yjJmFddbzA@mail.gmail.com/ [1] Link: https://github.com/rust-lang/rustfmt/issues/4884 [2] Link: https://hackmd.io/iSCyY3JTTz-g8YM-nnzTTA [3] Link: https://github.com/rust-lang/rustfmt/issues/4991 [4] Link: https://github.com/rust-lang/rustfmt/issues/3361 [5] Link: https://rust-lang.zulipchat.com/#narrow/channel/392734-council/topic/rustfmt.20maintenance/near/543815381 [6] Link: https://github.com/rust-lang/team/pull/2017 [7] Reviewed-by: Benno Lossin Signed-off-by: Miguel Ojeda --- Documentation/rust/coding-guidelines.rst | 75 ++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/Documentation/rust/coding-guidelines.rst b/Documentation/rust/coding-guidelines.rst index 6ff9e754755d..3198be3a6d63 100644 --- a/Documentation/rust/coding-guidelines.rst +++ b/Documentation/rust/coding-guidelines.rst @@ -38,6 +38,81 @@ Like ``clang-format`` for the rest of the kernel, ``rustfmt`` works on individual files, and does not require a kernel configuration. Sometimes it may even work with broken code. +Imports +~~~~~~~ + +``rustfmt``, by default, formats imports in a way that is prone to conflicts +while merging and rebasing, since in some cases it condenses several items into +the same line. For instance: + +.. code-block:: rust + + // Do not use this style. + use crate::{ + example1, + example2::{example3, example4, example5}, + example6, example7, + example8::example9, + }; + +Instead, the kernel uses a vertical layout that looks like this: + +.. code-block:: rust + + use crate::{ + example1, + example2::{ + example3, + example4, + example5, // + }, + example6, + example7, + example8::example9, // + }; + +That is, each item goes into its own line, and braces are used as soon as there +is more than one item in a list. + +The trailing empty comment allows to preserve this formatting. Not only that, +``rustfmt`` will actually reformat imports vertically when the empty comment is +added. That is, it is possible to easily reformat the original example into the +expected style by running ``rustfmt`` on an input like: + +.. code-block:: rust + + // Do not use this style. + use crate::{ + example1, + example2::{example3, example4, example5, // + }, + example6, example7, + example8::example9, // + }; + +The trailing empty comment works for nested imports, as shown above, as well as +for single item imports -- this can be useful to minimize diffs within patch +series: + +.. code-block:: rust + + use crate::{ + example1, // + }; + +The trailing empty comment works in any of the lines within the braces, but it +is preferred to keep it in the last item, since it is reminiscent of the +trailing comma in other formatters. Sometimes it may be simpler to avoid moving +the comment several times within a patch series due to changes in the list. + +There may be cases where exceptions may need to be made, i.e. none of this is +a hard rule. There is also code that is not migrated to this style yet, but +please do not introduce code in other styles. + +Eventually, the goal is to get ``rustfmt`` to support this formatting style (or +a similar one) automatically in a stable release without requiring the trailing +empty comment. Thus, at some point, the goal is to remove those comments. + Comments -------- From 8a7c601e14576a22c2bbf7f67455ccf3f3d2737f Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 10 Oct 2025 19:43:50 +0200 Subject: [PATCH 0109/1075] rust: alloc: employ a trailing comment to keep vertical layout Apply the formatting guidelines introduced in the previous commit to make the file `rustfmt`-clean again. Reviewed-by: Benno Lossin Signed-off-by: Miguel Ojeda --- rust/kernel/alloc/kvec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/alloc/kvec.rs b/rust/kernel/alloc/kvec.rs index e94aebd084c8..ac8d6f763ae8 100644 --- a/rust/kernel/alloc/kvec.rs +++ b/rust/kernel/alloc/kvec.rs @@ -9,7 +9,7 @@ }; use crate::{ fmt, - page::AsPageIter, + page::AsPageIter, // }; use core::{ borrow::{Borrow, BorrowMut}, From 32f072d9eaf9c31c2b0527a4a3370570a731e3cc Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 10 Oct 2025 19:43:51 +0200 Subject: [PATCH 0110/1075] rust: cpufreq: fix formatting We do our best to keep the repository `rustfmt`-clean, thus run the tool to fix the formatting issue. Link: https://docs.kernel.org/rust/coding-guidelines.html#style-formatting Link: https://rust-for-linux.com/contributing#submit-checklist-addendum Fixes: f97aef092e19 ("cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency") Acked-by: Viresh Kumar Reviewed-by: Benno Lossin Signed-off-by: Miguel Ojeda --- rust/kernel/cpufreq.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rust/kernel/cpufreq.rs b/rust/kernel/cpufreq.rs index 21b5b9b8acc1..1a555fcb120a 100644 --- a/rust/kernel/cpufreq.rs +++ b/rust/kernel/cpufreq.rs @@ -38,8 +38,7 @@ const CPUFREQ_NAME_LEN: usize = bindings::CPUFREQ_NAME_LEN as usize; /// Default transition latency value in nanoseconds. -pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = - bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; +pub const DEFAULT_TRANSITION_LATENCY_NS: u32 = bindings::CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS; /// CPU frequency driver flags. pub mod flags { From bf29555f5bdc017bac22ca66fcb6c9f46ec8788f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Wiesb=C3=B6ck?= Date: Wed, 15 Oct 2025 22:15:43 +0200 Subject: [PATCH 0111/1075] rtnetlink: Allow deleting FDB entries in user namespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creating FDB entries is possible from a non-initial user namespace when having CAP_NET_ADMIN, yet, when deleting FDB entries, processes receive an EPERM because the capability is always checked against the initial user namespace. This restricts the FDB management from unprivileged containers. Drop the netlink_capable check in rtnl_fdb_del as it was originally dropped in c5c351088ae7 and reintroduced in 1690be63a27b without intention. This patch was tested using a container on GyroidOS, where it was possible to delete FDB entries from an unprivileged user namespace and private network namespace. Fixes: 1690be63a27b ("bridge: Add vlan support to static neighbors") Reviewed-by: Michael Weiß Tested-by: Harshal Gohel Signed-off-by: Johannes Wiesböck Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20251015201548.319871-1-johannes.wiesboeck@aisec.fraunhofer.de Signed-off-by: Jakub Kicinski --- net/core/rtnetlink.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 8040ff7c356e..576d5ec3bb36 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4715,9 +4715,6 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, int err; u16 vid; - if (!netlink_capable(skb, CAP_NET_ADMIN)) - return -EPERM; - if (!del_bulk) { err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); From 7f864458e9a6d2000b726d14b3d3a706ac92a3b0 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 14 Oct 2025 17:49:34 +0200 Subject: [PATCH 0112/1075] net: stmmac: dwmac-rk: Fix disabling set_clock_selection On all platforms set_clock_selection() writes to a GRF register. This requires certain clocks running and thus should happen before the clocks are disabled. This has been noticed on RK3576 Sige5, which hangs during system suspend when trying to suspend the second network interface. Note, that suspending the first interface works, because the second device ensures that the necessary clocks for the GRF are enabled. Cc: stable@vger.kernel.org Fixes: 2f2b60a0ec28 ("net: ethernet: stmmac: dwmac-rk: Add gmac support for rk3588") Signed-off-by: Sebastian Reichel Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251014-rockchip-network-clock-fix-v1-1-c257b4afdf75@collabora.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 51ea0caf16c1..0786816e05f0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1446,14 +1446,15 @@ static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable) } } else { if (bsp_priv->clk_enabled) { + if (bsp_priv->ops && bsp_priv->ops->set_clock_selection) { + bsp_priv->ops->set_clock_selection(bsp_priv, + bsp_priv->clock_input, false); + } + clk_bulk_disable_unprepare(bsp_priv->num_clks, bsp_priv->clks); clk_disable_unprepare(bsp_priv->clk_phy); - if (bsp_priv->ops && bsp_priv->ops->set_clock_selection) - bsp_priv->ops->set_clock_selection(bsp_priv, - bsp_priv->clock_input, false); - bsp_priv->clk_enabled = false; } } From a429b76114aaca3ef1aff4cd469dcf025431bd11 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Sun, 12 Oct 2025 21:59:25 +0800 Subject: [PATCH 0113/1075] erofs: fix crafted invalid cases for encoded extents Robert recently reported two corrupted images that can cause system crashes, which are related to the new encoded extents introduced in Linux 6.15: - The first one [1] has plen != 0 (e.g. plen == 0x2000000) but (plen & Z_EROFS_EXTENT_PLEN_MASK) == 0. It is used to represent special extents such as sparse extents (!EROFS_MAP_MAPPED), but previously only plen == 0 was handled; - The second one [2] has pa 0xffffffffffdcffed and plen 0xb4000, then "cur [0xfffffffffffff000] += bvec.bv_len [0x1000]" in "} while ((cur += bvec.bv_len) < end);" wraps around, causing an out-of-bound access of pcl->compressed_bvecs[] in z_erofs_submit_queue(). EROFS only supports 48-bit physical block addresses (up to 1EiB for 4k blocks), so add a sanity check to enforce this. Fixes: 1d191b4ca51d ("erofs: implement encoded extent metadata") Reported-by: Robert Morris Closes: https://lore.kernel.org/r/75022.1759355830@localhost [1] Closes: https://lore.kernel.org/r/80524.1760131149@localhost [2] Reviewed-by: Hongbo Li Signed-off-by: Gao Xiang --- fs/erofs/zmap.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index e5581dbeb4c2..8007814f721e 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -596,7 +596,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode, vi->z_fragmentoff = map->m_plen; if (recsz > offsetof(struct z_erofs_extent, pstart_lo)) vi->z_fragmentoff |= map->m_pa << 32; - } else if (map->m_plen) { + } else if (map->m_plen & Z_EROFS_EXTENT_PLEN_MASK) { map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED; fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT; @@ -715,6 +715,7 @@ static int z_erofs_map_sanity_check(struct inode *inode, struct erofs_map_blocks *map) { struct erofs_sb_info *sbi = EROFS_I_SB(inode); + u64 pend; if (!(map->m_flags & EROFS_MAP_ENCODED)) return 0; @@ -732,6 +733,10 @@ static int z_erofs_map_sanity_check(struct inode *inode, if (unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE || map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE)) return -EOPNOTSUPP; + /* Filesystems beyond 48-bit physical block addresses are invalid */ + if (unlikely(check_add_overflow(map->m_pa, map->m_plen, &pend) || + (pend >> sbi->blkszbits) >= BIT_ULL(48))) + return -EFSCORRUPTED; return 0; } From 873f10cf8e4d59605bc38fa1051dea8ee56fe3be Mon Sep 17 00:00:00 2001 From: Fangyu Yu Date: Thu, 16 Oct 2025 09:26:59 +0800 Subject: [PATCH 0114/1075] RISC-V: KVM: Read HGEIP CSR on the correct cpu When executing kvm_riscv_vcpu_aia_has_interrupts, the vCPU may have migrated and the IMSIC VS-file have not been updated yet, currently the HGEIP CSR should be read from the imsic->vsfile_cpu ( the pCPU before migration ) via on_each_cpu_mask, but this will trigger an IPI call and repeated IPI within a period of time is expensive in a many-core systems. Just let the vCPU execute and update the correct IMSIC VS-file via kvm_riscv_vcpu_aia_imsic_update may be a simple solution. Fixes: 4cec89db80ba ("RISC-V: KVM: Move HGEI[E|P] CSR access to IMSIC virtualization") Signed-off-by: Fangyu Yu Reviewed-by: Guo Ren Reviewed-by: Anup Patel Tested-by: Anup Patel Link: https://lore.kernel.org/r/20251016012659.82998-1-fangyu.yu@linux.alibaba.com Signed-off-by: Anup Patel --- arch/riscv/kvm/aia_imsic.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index fda0346f0ea1..11422cb95a64 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -689,8 +689,20 @@ bool kvm_riscv_vcpu_aia_imsic_has_interrupt(struct kvm_vcpu *vcpu) */ read_lock_irqsave(&imsic->vsfile_lock, flags); - if (imsic->vsfile_cpu > -1) - ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + if (imsic->vsfile_cpu > -1) { + /* + * This function is typically called from kvm_vcpu_block() via + * kvm_arch_vcpu_runnable() upon WFI trap. The kvm_vcpu_block() + * can be preempted and the blocking VCPU might resume on a + * different CPU. This means it is possible that current CPU + * does not match the imsic->vsfile_cpu hence this function + * must check imsic->vsfile_cpu before accessing HGEIP CSR. + */ + if (imsic->vsfile_cpu != vcpu->cpu) + ret = true; + else + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); + } read_unlock_irqrestore(&imsic->vsfile_lock, flags); return ret; From 74b84d1be0220b99405c16a4a3e1e503e3bd8387 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 7 Oct 2025 11:43:12 +0200 Subject: [PATCH 0115/1075] driver core: fw_devlink: Don't warn about sync_state() pending Due to the wider deployment of the ->sync_state() support, for PM domains for example, we are receiving reports about the sync_state() pending message that is being logged in fw_devlink_dev_sync_state(). In particular as it's printed at the warning level, which is questionable. Even if it certainly is useful to know that the ->sync_state() condition could not be met, there may be nothing wrong with it. For example, a driver may be built as module and are still waiting to be initialized/probed. For this reason let's move to the info level for now. Reported-by: Geert Uytterhoeven Reported-by: Sebin Francis Reported-by: Diederik de Haas Reported-by: Jon Hunter Reviewed-by: Tomi Valkeinen Signed-off-by: Ulf Hansson Reviewed-by: Dhruva Gole Reviewed-by: Sebastian Reichel Reviewed-by: Kevin Hilman Acked-by: Saravana Kannan Reviewed-by: Sebin Francis Tested-by: Sebin Francis Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 3c533dab8fa5..f69dc9c85954 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1784,7 +1784,7 @@ static int fw_devlink_dev_sync_state(struct device *dev, void *data) return 0; if (fw_devlink_sync_state == FW_DEVLINK_SYNC_STATE_STRICT) { - dev_warn(sup, "sync_state() pending due to %s\n", + dev_info(sup, "sync_state() pending due to %s\n", dev_name(link->consumer)); return 0; } From a91c8096590bd7801a26454789f2992094fe36da Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 23 Jul 2025 16:24:16 +0200 Subject: [PATCH 0116/1075] devcoredump: Fix circular locking dependency with devcd->mutex. The original code causes a circular locking dependency found by lockdep. ====================================================== WARNING: possible circular locking dependency detected 6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 Tainted: G S U ------------------------------------------------------ xe_fault_inject/5091 is trying to acquire lock: ffff888156815688 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}, at: __flush_work+0x25d/0x660 but task is already holding lock: ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&devcd->mutex){+.+.}-{3:3}: mutex_lock_nested+0x4e/0xc0 devcd_data_write+0x27/0x90 sysfs_kf_bin_write+0x80/0xf0 kernfs_fop_write_iter+0x169/0x220 vfs_write+0x293/0x560 ksys_write+0x72/0xf0 __x64_sys_write+0x19/0x30 x64_sys_call+0x2bf/0x2660 do_syscall_64+0x93/0xb60 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (kn->active#236){++++}-{0:0}: kernfs_drain+0x1e2/0x200 __kernfs_remove+0xae/0x400 kernfs_remove_by_name_ns+0x5d/0xc0 remove_files+0x54/0x70 sysfs_remove_group+0x3d/0xa0 sysfs_remove_groups+0x2e/0x60 device_remove_attrs+0xc7/0x100 device_del+0x15d/0x3b0 devcd_del+0x19/0x30 process_one_work+0x22b/0x6f0 worker_thread+0x1e8/0x3d0 kthread+0x11c/0x250 ret_from_fork+0x26c/0x2e0 ret_from_fork_asm+0x1a/0x30 -> #0 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}: __lock_acquire+0x1661/0x2860 lock_acquire+0xc4/0x2f0 __flush_work+0x27a/0x660 flush_delayed_work+0x5d/0xa0 dev_coredump_put+0x63/0xa0 xe_driver_devcoredump_fini+0x12/0x20 [xe] devm_action_release+0x12/0x30 release_nodes+0x3a/0x120 devres_release_all+0x8a/0xd0 device_unbind_cleanup+0x12/0x80 device_release_driver_internal+0x23a/0x280 device_driver_detach+0x14/0x20 unbind_store+0xaf/0xc0 drv_attr_store+0x21/0x50 sysfs_kf_write+0x4a/0x80 kernfs_fop_write_iter+0x169/0x220 vfs_write+0x293/0x560 ksys_write+0x72/0xf0 __x64_sys_write+0x19/0x30 x64_sys_call+0x2bf/0x2660 do_syscall_64+0x93/0xb60 entry_SYSCALL_64_after_hwframe+0x76/0x7e other info that might help us debug this: Chain exists of: (work_completion)(&(&devcd->del_wk)->work) --> kn->active#236 --> &devcd->mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&devcd->mutex); lock(kn->active#236); lock(&devcd->mutex); lock((work_completion)(&(&devcd->del_wk)->work)); *** DEADLOCK *** 5 locks held by xe_fault_inject/5091: #0: ffff8881129f9488 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x72/0xf0 #1: ffff88810c755078 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x123/0x220 #2: ffff8881054811a0 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x55/0x280 #3: ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 #4: ffffffff8359e020 (rcu_read_lock){....}-{1:2}, at: __flush_work+0x72/0x660 stack backtrace: CPU: 14 UID: 0 PID: 5091 Comm: xe_fault_inject Tainted: G S U 6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 PREEMPT_{RT,(lazy)} Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER Hardware name: Micro-Star International Co., Ltd. MS-7D25/PRO Z690-A DDR4(MS-7D25), BIOS 1.10 12/13/2021 Call Trace: dump_stack_lvl+0x91/0xf0 dump_stack+0x10/0x20 print_circular_bug+0x285/0x360 check_noncircular+0x135/0x150 ? register_lock_class+0x48/0x4a0 __lock_acquire+0x1661/0x2860 lock_acquire+0xc4/0x2f0 ? __flush_work+0x25d/0x660 ? mark_held_locks+0x46/0x90 ? __flush_work+0x25d/0x660 __flush_work+0x27a/0x660 ? __flush_work+0x25d/0x660 ? trace_hardirqs_on+0x1e/0xd0 ? __pfx_wq_barrier_func+0x10/0x10 flush_delayed_work+0x5d/0xa0 dev_coredump_put+0x63/0xa0 xe_driver_devcoredump_fini+0x12/0x20 [xe] devm_action_release+0x12/0x30 release_nodes+0x3a/0x120 devres_release_all+0x8a/0xd0 device_unbind_cleanup+0x12/0x80 device_release_driver_internal+0x23a/0x280 ? bus_find_device+0xa8/0xe0 device_driver_detach+0x14/0x20 unbind_store+0xaf/0xc0 drv_attr_store+0x21/0x50 sysfs_kf_write+0x4a/0x80 kernfs_fop_write_iter+0x169/0x220 vfs_write+0x293/0x560 ksys_write+0x72/0xf0 __x64_sys_write+0x19/0x30 x64_sys_call+0x2bf/0x2660 do_syscall_64+0x93/0xb60 ? __f_unlock_pos+0x15/0x20 ? __x64_sys_getdents64+0x9b/0x130 ? __pfx_filldir64+0x10/0x10 ? do_syscall_64+0x1a2/0xb60 ? clear_bhb_loop+0x30/0x80 ? clear_bhb_loop+0x30/0x80 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x76e292edd574 Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 RSP: 002b:00007fffe247a828 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000076e292edd574 RDX: 000000000000000c RSI: 00006267f6306063 RDI: 000000000000000b RBP: 000000000000000c R08: 000076e292fc4b20 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000202 R12: 00006267f6306063 R13: 000000000000000b R14: 00006267e6859c00 R15: 000076e29322a000 xe 0000:03:00.0: [drm] Xe device coredump has been deleted. Fixes: 01daccf74832 ("devcoredump : Serialize devcd_del work") Cc: Mukesh Ojha Cc: Greg Kroah-Hartman Cc: Johannes Berg Cc: Rafael J. Wysocki Cc: Danilo Krummrich Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Maarten Lankhorst Cc: Matthew Brost Acked-by: Mukesh Ojha Link: https://lore.kernel.org/r/20250723142416.1020423-1-dev@lankhorst.se Signed-off-by: Greg Kroah-Hartman --- drivers/base/devcoredump.c | 136 ++++++++++++++++++++++--------------- 1 file changed, 83 insertions(+), 53 deletions(-) diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 37faf6156d7c..55bdc7f5e59d 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -23,50 +23,46 @@ struct devcd_entry { void *data; size_t datalen; /* - * Here, mutex is required to serialize the calls to del_wk work between - * user/kernel space which happens when devcd is added with device_add() - * and that sends uevent to user space. User space reads the uevents, - * and calls to devcd_data_write() which try to modify the work which is - * not even initialized/queued from devcoredump. + * There are 2 races for which mutex is required. * + * The first race is between device creation and userspace writing to + * schedule immediately destruction. * + * This race is handled by arming the timer before device creation, but + * when device creation fails the timer still exists. * - * cpu0(X) cpu1(Y) + * To solve this, hold the mutex during device_add(), and set + * init_completed on success before releasing the mutex. * - * dev_coredump() uevent sent to user space - * device_add() ======================> user space process Y reads the - * uevents writes to devcd fd - * which results into writes to + * That way the timer will never fire until device_add() is called, + * it will do nothing if init_completed is not set. The timer is also + * cancelled in that case. * - * devcd_data_write() - * mod_delayed_work() - * try_to_grab_pending() - * timer_delete() - * debug_assert_init() - * INIT_DELAYED_WORK() - * schedule_delayed_work() - * - * - * Also, mutex alone would not be enough to avoid scheduling of - * del_wk work after it get flush from a call to devcd_free() - * mentioned as below. - * - * disabled_store() - * devcd_free() - * mutex_lock() devcd_data_write() - * flush_delayed_work() - * mutex_unlock() - * mutex_lock() - * mod_delayed_work() - * mutex_unlock() - * So, delete_work flag is required. + * The second race involves multiple parallel invocations of devcd_free(), + * add a deleted flag so only 1 can call the destructor. */ struct mutex mutex; - bool delete_work; + bool init_completed, deleted; struct module *owner; ssize_t (*read)(char *buffer, loff_t offset, size_t count, void *data, size_t datalen); void (*free)(void *data); + /* + * If nothing interferes and device_add() was returns success, + * del_wk will destroy the device after the timer fires. + * + * Multiple userspace processes can interfere in the working of the timer: + * - Writing to the coredump will reschedule the timer to run immediately, + * if still armed. + * + * This is handled by using "if (cancel_delayed_work()) { + * schedule_delayed_work() }", to prevent re-arming after having + * been previously fired. + * - Writing to /sys/class/devcoredump/disabled will destroy the + * coredump synchronously. + * This is handled by using disable_delayed_work_sync(), and then + * checking if deleted flag is set with &devcd->mutex held. + */ struct delayed_work del_wk; struct device *failing_dev; }; @@ -95,14 +91,27 @@ static void devcd_dev_release(struct device *dev) kfree(devcd); } +static void __devcd_del(struct devcd_entry *devcd) +{ + devcd->deleted = true; + device_del(&devcd->devcd_dev); + put_device(&devcd->devcd_dev); +} + static void devcd_del(struct work_struct *wk) { struct devcd_entry *devcd; + bool init_completed; devcd = container_of(wk, struct devcd_entry, del_wk.work); - device_del(&devcd->devcd_dev); - put_device(&devcd->devcd_dev); + /* devcd->mutex serializes against dev_coredumpm_timeout */ + mutex_lock(&devcd->mutex); + init_completed = devcd->init_completed; + mutex_unlock(&devcd->mutex); + + if (init_completed) + __devcd_del(devcd); } static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, @@ -122,12 +131,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct devcd_entry *devcd = dev_to_devcd(dev); - mutex_lock(&devcd->mutex); - if (!devcd->delete_work) { - devcd->delete_work = true; - mod_delayed_work(system_wq, &devcd->del_wk, 0); - } - mutex_unlock(&devcd->mutex); + /* + * Although it's tempting to use mod_delayed work here, + * that will cause a reschedule if the timer already fired. + */ + if (cancel_delayed_work(&devcd->del_wk)) + schedule_delayed_work(&devcd->del_wk, 0); return count; } @@ -151,11 +160,21 @@ static int devcd_free(struct device *dev, void *data) { struct devcd_entry *devcd = dev_to_devcd(dev); + /* + * To prevent a race with devcd_data_write(), disable work and + * complete manually instead. + * + * We cannot rely on the return value of + * disable_delayed_work_sync() here, because it might be in the + * middle of a cancel_delayed_work + schedule_delayed_work pair. + * + * devcd->mutex here guards against multiple parallel invocations + * of devcd_free(). + */ + disable_delayed_work_sync(&devcd->del_wk); mutex_lock(&devcd->mutex); - if (!devcd->delete_work) - devcd->delete_work = true; - - flush_delayed_work(&devcd->del_wk); + if (!devcd->deleted) + __devcd_del(devcd); mutex_unlock(&devcd->mutex); return 0; } @@ -179,12 +198,10 @@ static ssize_t disabled_show(const struct class *class, const struct class_attri * put_device() <- last reference * error = fn(dev, data) devcd_dev_release() * devcd_free(dev, data) kfree(devcd) - * mutex_lock(&devcd->mutex); * * * In the above diagram, it looks like disabled_store() would be racing with parallelly - * running devcd_del() and result in memory abort while acquiring devcd->mutex which - * is called after kfree of devcd memory after dropping its last reference with + * running devcd_del() and result in memory abort after dropping its last reference with * put_device(). However, this will not happens as fn(dev, data) runs * with its own reference to device via klist_node so it is not its last reference. * so, above situation would not occur. @@ -374,7 +391,7 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, devcd->read = read; devcd->free = free; devcd->failing_dev = get_device(dev); - devcd->delete_work = false; + devcd->deleted = false; mutex_init(&devcd->mutex); device_initialize(&devcd->devcd_dev); @@ -383,8 +400,14 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, atomic_inc_return(&devcd_count)); devcd->devcd_dev.class = &devcd_class; - mutex_lock(&devcd->mutex); dev_set_uevent_suppress(&devcd->devcd_dev, true); + + /* devcd->mutex prevents devcd_del() completing until init finishes */ + mutex_lock(&devcd->mutex); + devcd->init_completed = false; + INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); + schedule_delayed_work(&devcd->del_wk, timeout); + if (device_add(&devcd->devcd_dev)) goto put_device; @@ -401,13 +424,20 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, dev_set_uevent_suppress(&devcd->devcd_dev, false); kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); - INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); - schedule_delayed_work(&devcd->del_wk, timeout); + + /* + * Safe to run devcd_del() now that we are done with devcd_dev. + * Alternatively we could have taken a ref on devcd_dev before + * dropping the lock. + */ + devcd->init_completed = true; mutex_unlock(&devcd->mutex); return; put_device: - put_device(&devcd->devcd_dev); mutex_unlock(&devcd->mutex); + cancel_delayed_work_sync(&devcd->del_wk); + put_device(&devcd->devcd_dev); + put_module: module_put(owner); free: From c7fbb8218b4ad35fec0bd2256d2b9c8d60331f33 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Thu, 16 Oct 2025 12:14:56 +0200 Subject: [PATCH 0117/1075] sysfs: check visibility before changing group attribute ownership Since commit 0c17270f9b92 ("net: sysfs: Implement is_visible for phys_(port_id, port_name, switch_id)"), __dev_change_net_namespace() can hit WARN_ON() when trying to change owner of a file that isn't visible. See the trace below: WARNING: CPU: 6 PID: 2938 at net/core/dev.c:12410 __dev_change_net_namespace+0xb89/0xc30 CPU: 6 UID: 0 PID: 2938 Comm: incusd Not tainted 6.17.1-1-mainline #1 PREEMPT(full) 4b783b4a638669fb644857f484487d17cb45ed1f Hardware name: Framework Laptop 13 (AMD Ryzen 7040Series)/FRANMDCP07, BIOS 03.07 02/19/2025 RIP: 0010:__dev_change_net_namespace+0xb89/0xc30 [...] Call Trace: ? if6_seq_show+0x30/0x50 do_setlink.isra.0+0xc7/0x1270 ? __nla_validate_parse+0x5c/0xcc0 ? security_capable+0x94/0x1a0 rtnl_newlink+0x858/0xc20 ? update_curr+0x8e/0x1c0 ? update_entity_lag+0x71/0x80 ? sched_balance_newidle+0x358/0x450 ? psi_task_switch+0x113/0x2a0 ? __pfx_rtnl_newlink+0x10/0x10 rtnetlink_rcv_msg+0x346/0x3e0 ? sched_clock+0x10/0x30 ? __pfx_rtnetlink_rcv_msg+0x10/0x10 netlink_rcv_skb+0x59/0x110 netlink_unicast+0x285/0x3c0 ? __alloc_skb+0xdb/0x1a0 netlink_sendmsg+0x20d/0x430 ____sys_sendmsg+0x39f/0x3d0 ? import_iovec+0x2f/0x40 ___sys_sendmsg+0x99/0xe0 __sys_sendmsg+0x8a/0xf0 do_syscall_64+0x81/0x970 ? __sys_bind+0xe3/0x110 ? syscall_exit_work+0x143/0x1b0 ? do_syscall_64+0x244/0x970 ? sock_alloc_file+0x63/0xc0 ? syscall_exit_work+0x143/0x1b0 ? do_syscall_64+0x244/0x970 ? alloc_fd+0x12e/0x190 ? put_unused_fd+0x2a/0x70 ? do_sys_openat2+0xa2/0xe0 ? syscall_exit_work+0x143/0x1b0 ? do_syscall_64+0x244/0x970 ? exc_page_fault+0x7e/0x1a0 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] Fix this by checking is_visible() before trying to touch the attribute. Fixes: 303a42769c4c ("sysfs: add sysfs_group{s}_change_owner()") Fixes: 0c17270f9b92 ("net: sysfs: Implement is_visible for phys_(port_id, port_name, switch_id)") Reported-by: Cynthia Closes: https://lore.kernel.org/netdev/01070199e22de7f8-28f711ab-d3f1-46d9-b9a0-048ab05eb09b-000000@eu-central-1.amazonses.com/ Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20251016101456.4087-1-fmancera@suse.de Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/group.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 2d78e94072a0..e142bac4f9f8 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -498,17 +498,26 @@ int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, } EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj); -static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, +static int sysfs_group_attrs_change_owner(struct kobject *kobj, + struct kernfs_node *grp_kn, const struct attribute_group *grp, struct iattr *newattrs) { struct kernfs_node *kn; - int error; + int error, i; + umode_t mode; if (grp->attrs) { struct attribute *const *attr; - for (attr = grp->attrs; *attr; attr++) { + for (i = 0, attr = grp->attrs; *attr; i++, attr++) { + if (grp->is_visible) { + mode = grp->is_visible(kobj, *attr, i); + if (mode & SYSFS_GROUP_INVISIBLE) + break; + if (!mode) + continue; + } kn = kernfs_find_and_get(grp_kn, (*attr)->name); if (!kn) return -ENOENT; @@ -523,7 +532,14 @@ static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, if (grp->bin_attrs) { const struct bin_attribute *const *bin_attr; - for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { + for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) { + if (grp->is_bin_visible) { + mode = grp->is_bin_visible(kobj, *bin_attr, i); + if (mode & SYSFS_GROUP_INVISIBLE) + break; + if (!mode) + continue; + } kn = kernfs_find_and_get(grp_kn, (*bin_attr)->attr.name); if (!kn) return -ENOENT; @@ -573,7 +589,7 @@ int sysfs_group_change_owner(struct kobject *kobj, error = kernfs_setattr(grp_kn, &newattrs); if (!error) - error = sysfs_group_attrs_change_owner(grp_kn, grp, &newattrs); + error = sysfs_group_attrs_change_owner(kobj, grp_kn, grp, &newattrs); kernfs_put(grp_kn); From 1f1d3e1d094db732d22b892227bf1e1ac3a8ca04 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 17 Oct 2025 00:57:54 +0200 Subject: [PATCH 0118/1075] rust: bitmap: fix formatting We do our best to keep the repository `rustfmt`-clean, thus run the tool to fix the formatting issue. Link: https://docs.kernel.org/rust/coding-guidelines.html#style-formatting Link: https://rust-for-linux.com/contributing#submit-checklist-addendum Fixes: 0f5878834d6c ("rust: bitmap: clean Rust 1.92.0 `unused_unsafe` warning") Reviewed-by: Burak Emir Signed-off-by: Miguel Ojeda --- rust/kernel/bitmap.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/kernel/bitmap.rs b/rust/kernel/bitmap.rs index 711b8368b38f..aa8fc7bf06fc 100644 --- a/rust/kernel/bitmap.rs +++ b/rust/kernel/bitmap.rs @@ -167,7 +167,9 @@ fn deref(&self) -> &Bitmap { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] - unsafe { core::ptr::addr_of!(self.repr.bitmap) } + unsafe { + core::ptr::addr_of!(self.repr.bitmap) + } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. unsafe { self.repr.ptr.as_ptr() } @@ -184,7 +186,9 @@ fn deref_mut(&mut self) -> &mut Bitmap { let ptr = if self.nbits <= BITS_PER_LONG { // SAFETY: Bitmap is represented inline. #[allow(unused_unsafe, reason = "Safe since Rust 1.92.0")] - unsafe { core::ptr::addr_of_mut!(self.repr.bitmap) } + unsafe { + core::ptr::addr_of_mut!(self.repr.bitmap) + } } else { // SAFETY: Bitmap is represented as array of `unsigned long`. unsafe { self.repr.ptr.as_ptr() } From 4eabd0d8791eaf9a7b114ccbf56eb488aefe7b1f Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Fri, 17 Oct 2025 11:29:22 +0100 Subject: [PATCH 0119/1075] drm/panthor: Fix kernel panic on partial unmap of a GPU VA region This commit address a kernel panic issue that can happen if Userspace tries to partially unmap a GPU virtual region (aka drm_gpuva). The VM_BIND interface allows partial unmapping of a BO. Panthor driver pre-allocates memory for the new drm_gpuva structures that would be needed for the map/unmap operation, done using drm_gpuvm layer. It expected that only one new drm_gpuva would be needed on umap but a partial unmap can require 2 new drm_gpuva and that's why it ended up doing a NULL pointer dereference causing a kernel panic. Following dump was seen when partial unmap was exercised. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000078 Mem abort info: ESR = 0x0000000096000046 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x06: level 2 translation fault Data abort info: ISV = 0, ISS = 0x00000046, ISS2 = 0x00000000 CM = 0, WnR = 1, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=000000088a863000 [000000000000078] pgd=080000088a842003, p4d=080000088a842003, pud=0800000884bf5003, pmd=0000000000000000 Internal error: Oops: 0000000096000046 [#1] PREEMPT SMP pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : panthor_gpuva_sm_step_remap+0xe4/0x330 [panthor] lr : panthor_gpuva_sm_step_remap+0x6c/0x330 [panthor] sp : ffff800085d43970 x29: ffff800085d43970 x28: ffff00080363e440 x27: ffff0008090c6000 x26: 0000000000000030 x25: ffff800085d439f8 x24: ffff00080d402000 x23: ffff800085d43b60 x22: ffff800085d439e0 x21: ffff00080abdb180 x20: 0000000000000000 x19: 0000000000000000 x18: 0000000000000010 x17: 6e656c202c303030 x16: 3666666666646466 x15: 393d61766f69202c x14: 312d3d7361203a70 x13: 303030323d6e656c x12: ffff80008324bf58 x11: 0000000000000003 x10: 0000000000000002 x9 : ffff8000801a6a9c x8 : ffff00080360b300 x7 : 0000000000000000 x6 : 000000088aa35fc7 x5 : fff1000080000000 x4 : ffff8000842ddd30 x3 : 0000000000000001 x2 : 0000000100000000 x1 : 0000000000000001 x0 : 0000000000000078 Call trace: panthor_gpuva_sm_step_remap+0xe4/0x330 [panthor] op_remap_cb.isra.22+0x50/0x80 __drm_gpuvm_sm_unmap+0x10c/0x1c8 drm_gpuvm_sm_unmap+0x40/0x60 panthor_vm_exec_op+0xb4/0x3d0 [panthor] panthor_vm_bind_exec_sync_op+0x154/0x278 [panthor] panthor_ioctl_vm_bind+0x160/0x4a0 [panthor] drm_ioctl_kernel+0xbc/0x138 drm_ioctl+0x240/0x500 __arm64_sys_ioctl+0xb0/0xf8 invoke_syscall+0x4c/0x110 el0_svc_common.constprop.1+0x98/0xf8 do_el0_svc+0x24/0x38 el0_svc+0x40/0xf8 el0t_64_sync_handler+0xa0/0xc8 el0t_64_sync+0x174/0x178 Signed-off-by: Akash Goel Reviewed-by: Boris Brezillon Reviewed-by: Liviu Dudau Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block") Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://lore.kernel.org/r/20251017102922.670084-1-akash.goel@arm.com --- drivers/gpu/drm/panthor/panthor_mmu.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 6dec4354e378..7870e7dbaa5d 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1175,10 +1175,14 @@ panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx) break; case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: - /* Partial unmaps might trigger a remap with either a prev or a next VA, - * but not both. + /* Two VMAs can be needed for an unmap, as an unmap can happen + * in the middle of a drm_gpuva, requiring a remap with both + * prev & next VA. Or an unmap can span more than one drm_gpuva + * where the first and last ones are covered partially, requring + * a remap for the first with a prev VA and remap for the last + * with a next VA. */ - vma_count = 1; + vma_count = 2; break; default: From e433110eb5bf067f74d3d15c5fb252206c66ae0b Mon Sep 17 00:00:00 2001 From: Inochi Amaoto Date: Tue, 14 Oct 2025 09:46:07 +0800 Subject: [PATCH 0120/1075] PCI: vmd: Override irq_startup()/irq_shutdown() in vmd_init_dev_msi_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit 54f45a30c0d0 ("PCI/MSI: Add startup/shutdown for per device domains") set callback irq_startup() and irq_shutdown() of the struct pci_msi[x]_template, __irq_startup() will always invokes irq_startup() callback instead of irq_enable() callback overridden in vmd_init_dev_msi_info(). This will not start the IRQ correctly. Also override irq_startup()/irq_shutdown() in vmd_init_dev_msi_info(), so the irq_startup() can invoke the real logic. Fixes: 54f45a30c0d0 ("PCI/MSI: Add startup/shutdown for per device domains") Reported-by: Kenneth Crudup Closes: https://lore.kernel.org/r/8a923590-5b3a-406f-a324-7bd1cf894d8f@panix.com/ Reported-by: Genes Lists Closes: https://lore.kernel.org/r/4b392af8847cc19720ffcd53865f60ab3edc56b3.camel@sapience.com Reported-by: Todd Brandt Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220658 Reported-by: Oliver Hartkopp Closes: https://lore.kernel.org/r/8d6887a5-60bc-423c-8f7a-87b4ab739f6a@hartkopp.net Reported-by: Hervé Signed-off-by: Inochi Amaoto Signed-off-by: Bjorn Helgaas Tested-by: Kenneth R. Crudup Tested-by: Genes Lists Tested-by: Oliver Hartkopp Tested-by: Todd Brandt Tested-by: Hervé Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251014014607.612586-1-inochiama@gmail.com --- drivers/pci/controller/vmd.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index 1bd5bf4a6097..b4b62b9ccc45 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -192,6 +192,12 @@ static void vmd_pci_msi_enable(struct irq_data *data) data->chip->irq_unmask(data); } +static unsigned int vmd_pci_msi_startup(struct irq_data *data) +{ + vmd_pci_msi_enable(data); + return 0; +} + static void vmd_irq_disable(struct irq_data *data) { struct vmd_irq *vmdirq = data->chip_data; @@ -210,6 +216,11 @@ static void vmd_pci_msi_disable(struct irq_data *data) vmd_irq_disable(data->parent_data); } +static void vmd_pci_msi_shutdown(struct irq_data *data) +{ + vmd_pci_msi_disable(data); +} + static struct irq_chip vmd_msi_controller = { .name = "VMD-MSI", .irq_compose_msi_msg = vmd_compose_msi_msg, @@ -309,6 +320,8 @@ static bool vmd_init_dev_msi_info(struct device *dev, struct irq_domain *domain, if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info)) return false; + info->chip->irq_startup = vmd_pci_msi_startup; + info->chip->irq_shutdown = vmd_pci_msi_shutdown; info->chip->irq_enable = vmd_pci_msi_enable; info->chip->irq_disable = vmd_pci_msi_disable; return true; From a78835b86a4414230e4cf9a9f16d22302cdb8388 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Mon, 13 Oct 2025 17:08:26 -0500 Subject: [PATCH 0121/1075] PCI/VGA: Select SCREEN_INFO on X86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 337bf13aa9dda ("PCI/VGA: Replace vga_is_firmware_default() with a screen info check") introduced an implicit dependency upon SCREEN_INFO by removing the open coded implementation. If a user didn't have CONFIG_SCREEN_INFO set, vga_is_firmware_default() would now return false. SCREEN_INFO is only used on X86 so add a conditional select for SCREEN_INFO to ensure that the VGA arbiter works as intended. Fixes: 337bf13aa9dda ("PCI/VGA: Replace vga_is_firmware_default() with a screen info check") Reported-by: Eric Biggers Closes: https://lore.kernel.org/linux-pci/20251012182302.GA3412@sol/ Suggested-by: Thomas Zimmermann Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Zimmermann Reviewed-by: Ilpo Järvinen Tested-by: Eric Biggers Link: https://patch.msgid.link/20251013220829.1536292-1-superm1@kernel.org --- drivers/pci/Kconfig | 1 + drivers/pci/vgaarb.c | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 7065a8e5f9b1..f94f5d384362 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -306,6 +306,7 @@ config VGA_ARB bool "VGA Arbitration" if EXPERT default y depends on (PCI && !S390) + select SCREEN_INFO if X86 help Some "legacy" VGA devices implemented on PCI typically have the same hard-decoded addresses as they did on ISA. When multiple PCI devices diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index b58f94ee4891..436fa7f4c387 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -556,10 +556,8 @@ EXPORT_SYMBOL(vga_put); static bool vga_is_firmware_default(struct pci_dev *pdev) { -#ifdef CONFIG_SCREEN_INFO - struct screen_info *si = &screen_info; - - return pdev == screen_info_pci_dev(si); +#if defined CONFIG_X86 + return pdev == screen_info_pci_dev(&screen_info); #else return false; #endif From 0fbbcab7f9082cdc233da5e5e353f69830f11956 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Oct 2025 00:07:42 -0700 Subject: [PATCH 0122/1075] cgroup/misc: fix misc_res_type kernel-doc warning Format the kernel-doc for SCALE_HW_CALIB_INVALID correctly to avoid a kernel-doc warning: Warning: include/linux/misc_cgroup.h:26 Enum value 'MISC_CG_RES_TDX' not described in enum 'misc_res_type' Fixes: 7c035bea9407 ("KVM: TDX: Register TDX host key IDs to cgroup misc controller") Signed-off-by: Randy Dunlap Signed-off-by: Tejun Heo --- include/linux/misc_cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h index 71cf5bfc6349..0cb36a3ffc47 100644 --- a/include/linux/misc_cgroup.h +++ b/include/linux/misc_cgroup.h @@ -19,7 +19,7 @@ enum misc_res_type { MISC_CG_RES_SEV_ES, #endif #ifdef CONFIG_INTEL_TDX_HOST - /* Intel TDX HKIDs resource */ + /** @MISC_CG_RES_TDX: Intel TDX HKIDs resource */ MISC_CG_RES_TDX, #endif /** @MISC_CG_RES_TYPES: count of enum misc_res_type constants */ From 17679ac6df6c4830ba711835aa8cf961be36cfa1 Mon Sep 17 00:00:00 2001 From: Dewei Meng Date: Thu, 16 Oct 2025 14:10:11 +0800 Subject: [PATCH 0123/1075] btrfs: directly free partially initialized fs_info in btrfs_check_leaked_roots() If fs_info->super_copy or fs_info->super_for_commit allocated failed in btrfs_get_tree_subvol(), then no need to call btrfs_free_fs_info(). Otherwise btrfs_check_leaked_roots() would access NULL pointer because fs_info->allocated_roots had not been initialised. syzkaller reported the following information: ------------[ cut here ]------------ BUG: unable to handle page fault for address: fffffffffffffbb0 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 64c9067 P4D 64c9067 PUD 64cb067 PMD 0 Oops: Oops: 0000 [#1] SMP KASAN PTI CPU: 0 UID: 0 PID: 1402 Comm: syz.1.35 Not tainted 6.15.8 #4 PREEMPT(lazy) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), (...) RIP: 0010:arch_atomic_read arch/x86/include/asm/atomic.h:23 [inline] RIP: 0010:raw_atomic_read include/linux/atomic/atomic-arch-fallback.h:457 [inline] RIP: 0010:atomic_read include/linux/atomic/atomic-instrumented.h:33 [inline] RIP: 0010:refcount_read include/linux/refcount.h:170 [inline] RIP: 0010:btrfs_check_leaked_roots+0x18f/0x2c0 fs/btrfs/disk-io.c:1230 [...] Call Trace: btrfs_free_fs_info+0x310/0x410 fs/btrfs/disk-io.c:1280 btrfs_get_tree_subvol+0x592/0x6b0 fs/btrfs/super.c:2029 btrfs_get_tree+0x63/0x80 fs/btrfs/super.c:2097 vfs_get_tree+0x98/0x320 fs/super.c:1759 do_new_mount+0x357/0x660 fs/namespace.c:3899 path_mount+0x716/0x19c0 fs/namespace.c:4226 do_mount fs/namespace.c:4239 [inline] __do_sys_mount fs/namespace.c:4450 [inline] __se_sys_mount fs/namespace.c:4427 [inline] __x64_sys_mount+0x28c/0x310 fs/namespace.c:4427 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x92/0x180 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f032eaffa8d [...] Fixes: 3bb17a25bcb0 ("btrfs: add get_tree callback for new mount API") CC: stable@vger.kernel.org # 6.12+ Reviewed-by: Daniel Vacek Reviewed-by: Qu Wenruo Signed-off-by: Dewei Meng Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/super.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index aadc02374b2a..430e7419349c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2068,7 +2068,13 @@ static int btrfs_get_tree_subvol(struct fs_context *fc) fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); if (!fs_info->super_copy || !fs_info->super_for_commit) { - btrfs_free_fs_info(fs_info); + /* + * Dont call btrfs_free_fs_info() to free it as it's still + * initialized partially. + */ + kfree(fs_info->super_copy); + kfree(fs_info->super_for_commit); + kvfree(fs_info); return -ENOMEM; } btrfs_init_fs_info(fs_info); From 1fabe43b4e1a97597ec5d5ffcd2b7cf96e654b8f Mon Sep 17 00:00:00 2001 From: Ting-Chang Hou Date: Thu, 16 Oct 2025 15:53:51 +0800 Subject: [PATCH 0124/1075] btrfs: send: fix duplicated rmdir operations when using extrefs Commit 29d6d30f5c8a ("Btrfs: send, don't send rmdir for same target multiple times") has fixed an issue that a send stream contained a rmdir operation for the same directory multiple times. After that fix we keep track of the last directory for which we sent a rmdir operation and compare with it before sending a rmdir for the parent inode of a deleted hardlink we are processing. But there is still a corner case that in between rmdir dir operations for the same inode we find deleted hardlinks for other parent inodes, so tracking just the last inode for which we sent a rmdir operation is not enough. Hardlinks of a file in the same directory are stored in the same INODE_REF item, but if the number of hardlinks is too large and can not fit in a leaf, we use INODE_EXTREF items to store them. The key of an INODE_EXTREF item is (inode_id, INODE_EXTREF, hash[name, parent ino]), so between two hardlinks for the same parent directory, we can find others for other parent directories. For example for the reproducer below we get the following (from a btrfs inspect-internal dump-tree output): item 0 key (259 INODE_EXTREF 2309449) itemoff 16257 itemsize 26 index 6925 parent 257 namelen 8 name: foo.6923 item 1 key (259 INODE_EXTREF 2311350) itemoff 16231 itemsize 26 index 6588 parent 258 namelen 8 name: foo.6587 item 2 key (259 INODE_EXTREF 2457395) itemoff 16205 itemsize 26 index 6611 parent 257 namelen 8 name: foo.6609 (...) So tracking the last directory's inode number does not work in this case since we process a link for parent inode 257, then for 258 and then back again for 257, and that second time we process a deleted link for 257 we think we have not yet sent a rmdir operation. Fix this by using a rbtree to keep track of all the directories for which we have already sent rmdir operations, and add those directories to the 'check_dirs' ref list in process_recorded_refs() only if the directory is not yet in the rbtree, otherwise skip it since it means we have already sent a rmdir operation for that directory. The following test script reproduces the problem: $ cat test.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi mkfs.btrfs -f $DEV mount $DEV $MNT mkdir $MNT/a $MNT/b echo 123 > $MNT/a/foo for ((i = 1; i <= 1000; i++)); do ln $MNT/a/foo $MNT/a/foo.$i ln $MNT/a/foo $MNT/b/foo.$i done btrfs subvolume snapshot -r $MNT $MNT/snap1 btrfs send $MNT/snap1 -f /tmp/base.send rm -r $MNT/a $MNT/b btrfs subvolume snapshot -r $MNT $MNT/snap2 btrfs send -p $MNT/snap1 $MNT/snap2 -f /tmp/incremental.send umount $MNT mkfs.btrfs -f $DEV mount $DEV $MNT btrfs receive $MNT -f /tmp/base.send btrfs receive $MNT -f /tmp/incremental.send rm -f /tmp/base.send /tmp/incremental.send umount $MNT When running it, it fails like this: $ ./test.sh (...) At subvol snap1 At snapshot snap2 ERROR: rmdir o257-9-0 failed: No such file or directory CC: Reviewed-by: Filipe Manana Signed-off-by: Ting-Chang Hou [ Updated changelog ] Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/send.c | 56 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6144e66661f5..96a030d28e09 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4102,6 +4102,48 @@ static int refresh_ref_path(struct send_ctx *sctx, struct recorded_ref *ref) return ret; } +static int rbtree_check_dir_ref_comp(const void *k, const struct rb_node *node) +{ + const struct recorded_ref *data = k; + const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node); + + if (data->dir > ref->dir) + return 1; + if (data->dir < ref->dir) + return -1; + if (data->dir_gen > ref->dir_gen) + return 1; + if (data->dir_gen < ref->dir_gen) + return -1; + return 0; +} + +static bool rbtree_check_dir_ref_less(struct rb_node *node, const struct rb_node *parent) +{ + const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node); + + return rbtree_check_dir_ref_comp(entry, parent) < 0; +} + +static int record_check_dir_ref_in_tree(struct rb_root *root, + struct recorded_ref *ref, struct list_head *list) +{ + struct recorded_ref *tmp_ref; + int ret; + + if (rb_find(ref, root, rbtree_check_dir_ref_comp)) + return 0; + + ret = dup_ref(ref, list); + if (ret < 0) + return ret; + + tmp_ref = list_last_entry(list, struct recorded_ref, list); + rb_add(&tmp_ref->node, root, rbtree_check_dir_ref_less); + tmp_ref->root = root; + return 0; +} + static int rename_current_inode(struct send_ctx *sctx, struct fs_path *current_path, struct fs_path *new_path) @@ -4129,11 +4171,11 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) struct recorded_ref *cur; struct recorded_ref *cur2; LIST_HEAD(check_dirs); + struct rb_root rbtree_check_dirs = RB_ROOT; struct fs_path *valid_path = NULL; u64 ow_inode = 0; u64 ow_gen; u64 ow_mode; - u64 last_dir_ino_rm = 0; bool did_overwrite = false; bool is_orphan = false; bool can_rename = true; @@ -4437,7 +4479,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) goto out; } } - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } @@ -4465,7 +4507,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) } list_for_each_entry(cur, &sctx->deleted_refs, list) { - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } @@ -4475,7 +4517,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) * We have a moved dir. Add the old parent to check_dirs */ cur = list_first_entry(&sctx->deleted_refs, struct recorded_ref, list); - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } else if (!S_ISDIR(sctx->cur_inode_mode)) { @@ -4509,7 +4551,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) if (is_current_inode_path(sctx, cur->full_path)) fs_path_reset(&sctx->cur_inode_path); } - ret = dup_ref(cur, &check_dirs); + ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs); if (ret < 0) goto out; } @@ -4552,8 +4594,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; - } else if (ret == inode_state_did_delete && - cur->dir != last_dir_ino_rm) { + } else if (ret == inode_state_did_delete) { ret = can_rmdir(sctx, cur->dir, cur->dir_gen); if (ret < 0) goto out; @@ -4565,7 +4606,6 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) ret = send_rmdir(sctx, valid_path); if (ret < 0) goto out; - last_dir_ino_rm = cur->dir; } } } From cfec502b3d091ff7c24df6ccf8079470584315a0 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 16 Oct 2025 15:31:44 +0200 Subject: [PATCH 0125/1075] rust: device: fix device context of Device::parent() Regardless of the DeviceContext of a device, we can't give any guarantees about the DeviceContext of its parent device. This is very subtle, since it's only caused by a simple typo, i.e. Self::from_raw(parent) which preserves the DeviceContext in this case, vs. Device::from_raw(parent) which discards the DeviceContext. (I should have noticed it doing the correct thing in auxiliary::Device subsequently, but somehow missed it.) Hence, fix both Device::parent() and auxiliary::Device::parent(). Cc: stable@vger.kernel.org Fixes: a4c9f71e3440 ("rust: device: implement Device::parent()") Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Acked-by: Greg Kroah-Hartman Signed-off-by: Danilo Krummrich --- rust/kernel/auxiliary.rs | 8 +------- rust/kernel/device.rs | 4 ++-- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs index e11848bbf206..7a3b0b9c418e 100644 --- a/rust/kernel/auxiliary.rs +++ b/rust/kernel/auxiliary.rs @@ -217,13 +217,7 @@ pub fn id(&self) -> u32 { /// Returns a reference to the parent [`device::Device`], if any. pub fn parent(&self) -> Option<&device::Device> { - let ptr: *const Self = self; - // CAST: `Device` types are transparent to each other. - let ptr: *const Device = ptr.cast(); - // SAFETY: `ptr` was derived from `&self`. - let this = unsafe { &*ptr }; - - this.as_ref().parent() + self.as_ref().parent() } } diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 1321e6f0b53c..a849b7dde2fd 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -251,7 +251,7 @@ pub(crate) fn as_raw(&self) -> *mut bindings::device { /// Returns a reference to the parent device, if any. #[cfg_attr(not(CONFIG_AUXILIARY_BUS), expect(dead_code))] - pub(crate) fn parent(&self) -> Option<&Self> { + pub(crate) fn parent(&self) -> Option<&Device> { // SAFETY: // - By the type invariant `self.as_raw()` is always valid. // - The parent device is only ever set at device creation. @@ -264,7 +264,7 @@ pub(crate) fn parent(&self) -> Option<&Self> { // - Since `parent` is not NULL, it must be a valid pointer to a `struct device`. // - `parent` is valid for the lifetime of `self`, since a `struct device` holds a // reference count of its parent. - Some(unsafe { Self::from_raw(parent) }) + Some(unsafe { Device::from_raw(parent) }) } } From 50bd33f6b3922a6b760aa30d409cae891cec8fb5 Mon Sep 17 00:00:00 2001 From: Jianpeng Chang Date: Wed, 15 Oct 2025 10:14:27 +0800 Subject: [PATCH 0126/1075] net: enetc: fix the deadlock of enetc_mdio_lock After applying the workaround for err050089, the LS1028A platform experiences RCU stalls on RT kernel. This issue is caused by the recursive acquisition of the read lock enetc_mdio_lock. Here list some of the call stacks identified under the enetc_poll path that may lead to a deadlock: enetc_poll -> enetc_lock_mdio -> enetc_clean_rx_ring OR napi_complete_done -> napi_gro_receive -> enetc_start_xmit -> enetc_lock_mdio -> enetc_map_tx_buffs -> enetc_unlock_mdio -> enetc_unlock_mdio After enetc_poll acquires the read lock, a higher-priority writer attempts to acquire the lock, causing preemption. The writer detects that a read lock is already held and is scheduled out. However, readers under enetc_poll cannot acquire the read lock again because a writer is already waiting, leading to a thread hang. Currently, the deadlock is avoided by adjusting enetc_lock_mdio to prevent recursive lock acquisition. Fixes: 6d36ecdbc441 ("net: enetc: take the MDIO lock only once per NAPI poll cycle") Signed-off-by: Jianpeng Chang Acked-by: Wei Fang Link: https://patch.msgid.link/20251015021427.180757-1-jianpeng.chang.cn@windriver.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 25 ++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index aae462a0cf5a..0535e92404e3 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -1595,6 +1595,8 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, /* next descriptor to process */ i = rx_ring->next_to_clean; + enetc_lock_mdio(); + while (likely(rx_frm_cnt < work_limit)) { union enetc_rx_bd *rxbd; struct sk_buff *skb; @@ -1630,7 +1632,9 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, rx_byte_cnt += skb->len + ETH_HLEN; rx_frm_cnt++; + enetc_unlock_mdio(); napi_gro_receive(napi, skb); + enetc_lock_mdio(); } rx_ring->next_to_clean = i; @@ -1638,6 +1642,8 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, rx_ring->stats.packets += rx_frm_cnt; rx_ring->stats.bytes += rx_byte_cnt; + enetc_unlock_mdio(); + return rx_frm_cnt; } @@ -1947,6 +1953,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, /* next descriptor to process */ i = rx_ring->next_to_clean; + enetc_lock_mdio(); + while (likely(rx_frm_cnt < work_limit)) { union enetc_rx_bd *rxbd, *orig_rxbd; struct xdp_buff xdp_buff; @@ -2010,7 +2018,9 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, */ enetc_bulk_flip_buff(rx_ring, orig_i, i); + enetc_unlock_mdio(); napi_gro_receive(napi, skb); + enetc_lock_mdio(); break; case XDP_TX: tx_ring = priv->xdp_tx_ring[rx_ring->index]; @@ -2045,7 +2055,9 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, } break; case XDP_REDIRECT: + enetc_unlock_mdio(); err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog); + enetc_lock_mdio(); if (unlikely(err)) { enetc_xdp_drop(rx_ring, orig_i, i); rx_ring->stats.xdp_redirect_failures++; @@ -2065,8 +2077,11 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, rx_ring->stats.packets += rx_frm_cnt; rx_ring->stats.bytes += rx_byte_cnt; - if (xdp_redirect_frm_cnt) + if (xdp_redirect_frm_cnt) { + enetc_unlock_mdio(); xdp_do_flush(); + enetc_lock_mdio(); + } if (xdp_tx_frm_cnt) enetc_update_tx_ring_tail(tx_ring); @@ -2075,6 +2090,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring) - rx_ring->xdp.xdp_tx_in_flight); + enetc_unlock_mdio(); + return rx_frm_cnt; } @@ -2093,6 +2110,7 @@ static int enetc_poll(struct napi_struct *napi, int budget) for (i = 0; i < v->count_tx_rings; i++) if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) complete = false; + enetc_unlock_mdio(); prog = rx_ring->xdp.prog; if (prog) @@ -2104,10 +2122,8 @@ static int enetc_poll(struct napi_struct *napi, int budget) if (work_done) v->rx_napi_work = true; - if (!complete) { - enetc_unlock_mdio(); + if (!complete) return budget; - } napi_complete_done(napi, work_done); @@ -2116,6 +2132,7 @@ static int enetc_poll(struct napi_struct *napi, int budget) v->rx_napi_work = false; + enetc_lock_mdio(); /* enable interrupts */ enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE); From e59bc32df2e989f034623a580e30a2a72af33b3f Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 16 Oct 2025 16:01:31 +0800 Subject: [PATCH 0127/1075] net: enetc: correct the value of ENETC_RXB_TRUESIZE The ENETC RX ring uses the page halves flipping mechanism, each page is split into two halves for the RX ring to use. And ENETC_RXB_TRUESIZE is defined to 2048 to indicate the size of half a page. However, the page size is configurable, for ARM64 platform, PAGE_SIZE is default to 4K, but it could be configured to 16K or 64K. When PAGE_SIZE is set to 16K or 64K, ENETC_RXB_TRUESIZE is not correct, and the RX ring will always use the first half of the page. This is not consistent with the description in the relevant kernel doc and commit messages. This issue is invisible in most cases, but if users want to increase PAGE_SIZE to receive a Jumbo frame with a single buffer for some use cases, it will not work as expected, because the buffer size of each RX BD is fixed to 2048 bytes. Based on the above two points, we expect to correct ENETC_RXB_TRUESIZE to (PAGE_SIZE >> 1), as described in the comment. Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers") Signed-off-by: Wei Fang Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20251016080131.3127122-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 0ec010a7d640..f279fa597991 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -76,7 +76,7 @@ struct enetc_lso_t { #define ENETC_LSO_MAX_DATA_LEN SZ_256K #define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE -#define ENETC_RXB_TRUESIZE 2048 /* PAGE_SIZE >> 1 */ +#define ENETC_RXB_TRUESIZE (PAGE_SIZE >> 1) #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */ #define ENETC_RXB_DMA_SIZE \ (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD) From cb74f8c952508bc85ec9583fa7da31c9b1440f26 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 16 Oct 2025 16:39:37 +0700 Subject: [PATCH 0128/1075] Documentation: net: net_failover: Separate cloud-ifupdown-helper and reattach-vf.sh code blocks marker cloud-ifupdown-helper patch and reattach-vf.sh script are rendered in htmldocs output as normal paragraphs instead of literal code blocks due to missing separator from respective code block marker. Add it. Signed-off-by: Bagas Sanjaya Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251016093936.29442-2-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/net_failover.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/net_failover.rst b/Documentation/networking/net_failover.rst index f4e1b4e07adc..2f776e90d318 100644 --- a/Documentation/networking/net_failover.rst +++ b/Documentation/networking/net_failover.rst @@ -96,9 +96,8 @@ needed to these network configuration daemons to make sure that an IP is received only on the 'failover' device. Below is the patch snippet used with 'cloud-ifupdown-helper' script found on -Debian cloud images: +Debian cloud images:: -:: @@ -27,6 +27,8 @@ do_setup() { local working="$cfgdir/.$INTERFACE" local final="$cfgdir/$INTERFACE" @@ -172,9 +171,8 @@ appropriate FDB entry is added. The following script is executed on the destination hypervisor once migration completes, and it reattaches the VF to the VM and brings down the virtio-net -interface. +interface:: -:: # reattach-vf.sh #!/bin/bash From e0caeb24f538c3c9c94f471882ceeb43d9dc2739 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Thu, 16 Oct 2025 20:51:36 +0800 Subject: [PATCH 0129/1075] net: bonding: update the slave array for broadcast mode This patch fixes ce7a381697cb ("net: bonding: add broadcast_neighbor option for 802.3ad"). Before this commit, on the broadcast mode, all devices were traversed using the bond_for_each_slave_rcu. This patch supports traversing devices by using all_slaves. Therefore, we need to update the slave array when enslave or release slave. Fixes: ce7a381697cb ("net: bonding: add broadcast_neighbor option for 802.3ad") Cc: Simon Horman Cc: Jonathan Corbet Cc: Andrew Lunn Cc: Reported-by: Jiri Slaby Tested-by: Jiri Slaby Link: https://lore.kernel.org/all/a97e6e1e-81bc-4a79-8352-9e4794b0d2ca@kernel.org/ Signed-off-by: Tonghao Zhang Reviewed-by: Hangbin Liu Reviewed-by: Nikolay Aleksandrov Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20251016125136.16568-1-tonghao@bamaicloud.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 4da619210c1f..67fdcbdd2764 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2287,7 +2287,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, unblock_netpoll_tx(); } - if (bond_mode_can_use_xmit_hash(bond)) + /* broadcast mode uses the all_slaves to loop through slaves. */ + if (bond_mode_can_use_xmit_hash(bond) || + BOND_MODE(bond) == BOND_MODE_BROADCAST) bond_update_slave_arr(bond, NULL); if (!slave_dev->netdev_ops->ndo_bpf || @@ -2463,7 +2465,8 @@ static int __bond_release_one(struct net_device *bond_dev, bond_upper_dev_unlink(bond, slave); - if (bond_mode_can_use_xmit_hash(bond)) + if (bond_mode_can_use_xmit_hash(bond) || + BOND_MODE(bond) == BOND_MODE_BROADCAST) bond_update_slave_arr(bond, slave); slave_info(bond_dev, slave_dev, "Releasing %s interface\n", From 902e81e679d86846a2404630d349709ad9372d0d Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 16 Oct 2025 16:58:07 +0300 Subject: [PATCH 0130/1075] dpaa2-eth: fix the pointer passed to PTR_ALIGN on Tx path The blamed commit increased the needed headroom to account for alignment. This means that the size required to always align a Tx buffer was added inside the dpaa2_eth_needed_headroom() function. By doing that, a manual adjustment of the pointer passed to PTR_ALIGN() was no longer correct since the 'buffer_start' variable was already pointing to the start of the skb's memory. The behavior of the dpaa2-eth driver without this patch was to drop frames on Tx even when the headroom was matching the 128 bytes necessary. Fix this by removing the manual adjust of 'buffer_start' from the PTR_MODE call. Closes: https://lore.kernel.org/netdev/70f0dcd9-1906-4d13-82df-7bbbbe7194c6@app.fastmail.com/T/#u Fixes: f422abe3f23d ("dpaa2-eth: increase the needed headroom to account for alignment") Signed-off-by: Ioana Ciornei Tested-by: Mathew McBride Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251016135807.360978-1-ioana.ciornei@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index c96d1d6ba8fe..18d86badd6ea 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1077,8 +1077,7 @@ static int dpaa2_eth_build_single_fd(struct dpaa2_eth_priv *priv, dma_addr_t addr; buffer_start = skb->data - dpaa2_eth_needed_headroom(skb); - aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN, - DPAA2_ETH_TX_BUF_ALIGN); + aligned_start = PTR_ALIGN(buffer_start, DPAA2_ETH_TX_BUF_ALIGN); if (aligned_start >= skb->head) buffer_start = aligned_start; else From ffff5c8fc2af2218a3332b3d5b97654599d50cde Mon Sep 17 00:00:00 2001 From: Aleksander Jan Bajkowski Date: Thu, 16 Oct 2025 21:22:52 +0200 Subject: [PATCH 0131/1075] net: phy: realtek: fix rtl8221b-vm-cg name When splitting the RTL8221B-VM-CG into C22 and C45 variants, the name was accidentally changed to RTL8221B-VN-CG. This patch brings back the previous part number. Fixes: ad5ce743a6b0 ("net: phy: realtek: Add driver instances for rtl8221b via Clause 45") Signed-off-by: Aleksander Jan Bajkowski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251016192325.2306757-1-olek2@wp.pl Signed-off-by: Jakub Kicinski --- drivers/net/phy/realtek/realtek_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index a724b21b4fe7..16a347084293 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -154,7 +154,7 @@ #define RTL_8211FVD_PHYID 0x001cc878 #define RTL_8221B 0x001cc840 #define RTL_8221B_VB_CG 0x001cc849 -#define RTL_8221B_VN_CG 0x001cc84a +#define RTL_8221B_VM_CG 0x001cc84a #define RTL_8251B 0x001cc862 #define RTL_8261C 0x001cc890 @@ -1523,16 +1523,16 @@ static int rtl8221b_vb_cg_c45_match_phy_device(struct phy_device *phydev, return rtlgen_is_c45_match(phydev, RTL_8221B_VB_CG, true); } -static int rtl8221b_vn_cg_c22_match_phy_device(struct phy_device *phydev, +static int rtl8221b_vm_cg_c22_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - return rtlgen_is_c45_match(phydev, RTL_8221B_VN_CG, false); + return rtlgen_is_c45_match(phydev, RTL_8221B_VM_CG, false); } -static int rtl8221b_vn_cg_c45_match_phy_device(struct phy_device *phydev, +static int rtl8221b_vm_cg_c45_match_phy_device(struct phy_device *phydev, const struct phy_driver *phydrv) { - return rtlgen_is_c45_match(phydev, RTL_8221B_VN_CG, true); + return rtlgen_is_c45_match(phydev, RTL_8221B_VM_CG, true); } static int rtl_internal_nbaset_match_phy_device(struct phy_device *phydev, @@ -1879,7 +1879,7 @@ static struct phy_driver realtek_drvs[] = { .suspend = genphy_c45_pma_suspend, .resume = rtlgen_c45_resume, }, { - .match_phy_device = rtl8221b_vn_cg_c22_match_phy_device, + .match_phy_device = rtl8221b_vm_cg_c22_match_phy_device, .name = "RTL8221B-VM-CG 2.5Gbps PHY (C22)", .probe = rtl822x_probe, .get_features = rtl822x_get_features, @@ -1892,8 +1892,8 @@ static struct phy_driver realtek_drvs[] = { .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, }, { - .match_phy_device = rtl8221b_vn_cg_c45_match_phy_device, - .name = "RTL8221B-VN-CG 2.5Gbps PHY (C45)", + .match_phy_device = rtl8221b_vm_cg_c45_match_phy_device, + .name = "RTL8221B-VM-CG 2.5Gbps PHY (C45)", .probe = rtl822x_probe, .config_init = rtl822xb_config_init, .get_rate_matching = rtl822xb_get_rate_matching, From ca525d53f994d45c8140968b571372c45f555ac1 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 17 Oct 2025 21:30:05 -0600 Subject: [PATCH 0132/1075] RISC-V: Define pgprot_dmacoherent() for non-coherent devices The pgprot_dmacoherent() is used when allocating memory for non-coherent devices and by default pgprot_dmacoherent() is same as pgprot_noncached() unless architecture overrides it. Currently, there is no pgprot_dmacoherent() definition for RISC-V hence non-coherent device memory is being mapped as IO thereby making CPU access to such memory slow. Define pgprot_dmacoherent() to be same as pgprot_writecombine() for RISC-V so that CPU access non-coherent device memory as NOCACHE which is better than accessing it as IO. Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support") Signed-off-by: Anup Patel Tested-by: Han Gao Tested-by: Guo Ren (Alibaba DAMO Academy) Link: https://lore.kernel.org/r/20250820152316.1012757-1-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 29e994a9afb6..5a08eb5fe99f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -654,6 +654,8 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } +#define pgprot_dmacoherent pgprot_writecombine + /* * Both Svade and Svadu control the hardware behavior when the PTE A/D bits need to be set. By * default the M-mode firmware enables the hardware updating scheme when only Svadu is present in From e7b969cbe302d49032d4c2bb36c57c9c623ebfdc Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 13 Oct 2025 23:49:47 +0530 Subject: [PATCH 0133/1075] ACPI: RIMT: Fix unused function warnings when CONFIG_IOMMU_API is disabled When CONFIG_IOMMU_API is disabled, some functions defined outside its conditional scope become unused, triggering compiler warnings reported by the kernel test robot. Move these function definitions inside the #ifdef CONFIG_IOMMU_API block to prevent unused function warnings when the configuration is disabled. Fixes: 8f7729552582 ("ACPI: RISC-V: Add support for RIMT") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509280031.8Sjkr4bh-lkp@intel.com/ Signed-off-by: Sunil V L Link: https://lore.kernel.org/r/20251013181947.261759-1-sunilvl@ventanamicro.com --- drivers/acpi/riscv/rimt.c | 122 +++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/drivers/acpi/riscv/rimt.c b/drivers/acpi/riscv/rimt.c index 683fcfe35c31..7f423405e5ef 100644 --- a/drivers/acpi/riscv/rimt.c +++ b/drivers/acpi/riscv/rimt.c @@ -61,30 +61,6 @@ static int rimt_set_fwnode(struct acpi_rimt_node *rimt_node, return 0; } -/** - * rimt_get_fwnode() - Retrieve fwnode associated with an RIMT node - * - * @node: RIMT table node to be looked-up - * - * Returns: fwnode_handle pointer on success, NULL on failure - */ -static struct fwnode_handle *rimt_get_fwnode(struct acpi_rimt_node *node) -{ - struct fwnode_handle *fwnode = NULL; - struct rimt_fwnode *curr; - - spin_lock(&rimt_fwnode_lock); - list_for_each_entry(curr, &rimt_fwnode_list, list) { - if (curr->rimt_node == node) { - fwnode = curr->fwnode; - break; - } - } - spin_unlock(&rimt_fwnode_lock); - - return fwnode; -} - static acpi_status rimt_match_node_callback(struct acpi_rimt_node *node, void *context) { @@ -202,6 +178,67 @@ static struct acpi_rimt_node *rimt_scan_node(enum acpi_rimt_node_type type, return NULL; } +/* + * RISC-V supports IOMMU as a PCI device or a platform device. + * When it is a platform device, there should be a namespace device as + * well along with RIMT. To create the link between RIMT information and + * the platform device, the IOMMU driver should register itself with the + * RIMT module. This is true for PCI based IOMMU as well. + */ +int rimt_iommu_register(struct device *dev) +{ + struct fwnode_handle *rimt_fwnode; + struct acpi_rimt_node *node; + + node = rimt_scan_node(ACPI_RIMT_NODE_TYPE_IOMMU, dev); + if (!node) { + pr_err("Could not find IOMMU node in RIMT\n"); + return -ENODEV; + } + + if (dev_is_pci(dev)) { + rimt_fwnode = acpi_alloc_fwnode_static(); + if (!rimt_fwnode) + return -ENOMEM; + + rimt_fwnode->dev = dev; + if (!dev->fwnode) + dev->fwnode = rimt_fwnode; + + rimt_set_fwnode(node, rimt_fwnode); + } else { + rimt_set_fwnode(node, dev->fwnode); + } + + return 0; +} + +#ifdef CONFIG_IOMMU_API + +/** + * rimt_get_fwnode() - Retrieve fwnode associated with an RIMT node + * + * @node: RIMT table node to be looked-up + * + * Returns: fwnode_handle pointer on success, NULL on failure + */ +static struct fwnode_handle *rimt_get_fwnode(struct acpi_rimt_node *node) +{ + struct fwnode_handle *fwnode = NULL; + struct rimt_fwnode *curr; + + spin_lock(&rimt_fwnode_lock); + list_for_each_entry(curr, &rimt_fwnode_list, list) { + if (curr->rimt_node == node) { + fwnode = curr->fwnode; + break; + } + } + spin_unlock(&rimt_fwnode_lock); + + return fwnode; +} + static bool rimt_pcie_rc_supports_ats(struct acpi_rimt_node *node) { struct acpi_rimt_pcie_rc *pci_rc; @@ -290,43 +327,6 @@ static struct acpi_rimt_node *rimt_node_get_id(struct acpi_rimt_node *node, return NULL; } -/* - * RISC-V supports IOMMU as a PCI device or a platform device. - * When it is a platform device, there should be a namespace device as - * well along with RIMT. To create the link between RIMT information and - * the platform device, the IOMMU driver should register itself with the - * RIMT module. This is true for PCI based IOMMU as well. - */ -int rimt_iommu_register(struct device *dev) -{ - struct fwnode_handle *rimt_fwnode; - struct acpi_rimt_node *node; - - node = rimt_scan_node(ACPI_RIMT_NODE_TYPE_IOMMU, dev); - if (!node) { - pr_err("Could not find IOMMU node in RIMT\n"); - return -ENODEV; - } - - if (dev_is_pci(dev)) { - rimt_fwnode = acpi_alloc_fwnode_static(); - if (!rimt_fwnode) - return -ENOMEM; - - rimt_fwnode->dev = dev; - if (!dev->fwnode) - dev->fwnode = rimt_fwnode; - - rimt_set_fwnode(node, rimt_fwnode); - } else { - rimt_set_fwnode(node, dev->fwnode); - } - - return 0; -} - -#ifdef CONFIG_IOMMU_API - static struct acpi_rimt_node *rimt_node_map_id(struct acpi_rimt_node *node, u32 id_in, u32 *id_out, u8 type_mask) From 223bfc4d403c5e4841f9d2b5be88a6e236942e4e Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 15 Oct 2025 17:32:05 -0700 Subject: [PATCH 0134/1075] riscv: Register IPI IRQs with unique names This allows different IPIs to be distinguished in tracing output. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20251016003244.3910332-1-samuel.holland@sifive.com Signed-off-by: Paul Walmsley --- arch/riscv/kernel/smp.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index e650dec44817..5ed5095320e6 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -40,6 +40,17 @@ enum ipi_message_type { IPI_MAX }; +static const char * const ipi_names[] = { + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", +}; + unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = INVALID_HARTID }; @@ -199,7 +210,7 @@ void riscv_ipi_set_virq_range(int virq, int nr) /* Request IPIs */ for (i = 0; i < nr_ipi; i++) { err = request_percpu_irq(ipi_virq_base + i, handle_IPI, - "IPI", &ipi_dummy_dev); + ipi_names[i], &ipi_dummy_dev); WARN_ON(err); ipi_desc[i] = irq_to_desc(ipi_virq_base + i); @@ -210,17 +221,6 @@ void riscv_ipi_set_virq_range(int virq, int nr) riscv_ipi_enable(); } -static const char * const ipi_names[] = { - [IPI_RESCHEDULE] = "Rescheduling interrupts", - [IPI_CALL_FUNC] = "Function call interrupts", - [IPI_CPU_STOP] = "CPU stop interrupts", - [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", - [IPI_IRQ_WORK] = "IRQ work interrupts", - [IPI_TIMER] = "Timer broadcast interrupts", - [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", - [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", -}; - void show_ipi_stats(struct seq_file *p, int prec) { unsigned int cpu, i; From 5898fc01ff344075e4332aa9abeb0841c85e7e51 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 15 Oct 2025 16:33:24 -0700 Subject: [PATCH 0135/1075] riscv: mm: Define MAX_POSSIBLE_PHYSMEM_BITS for zsmalloc This definition is used by zsmalloc to optimize memory allocation. On riscv64, it is the same as MAX_PHYSMEM_BITS from asm/sparsemem.h, but that definition depends on CONFIG_SPARSEMEM. The correct definition is already provided for riscv32. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20251015233327.3885003-1-samuel.holland@sifive.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/pgtable-64.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 1018d2216901..6e789fa58514 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -69,6 +69,8 @@ typedef struct { #define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t)) +#define MAX_POSSIBLE_PHYSMEM_BITS 56 + /* * rv64 PTE format: * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 From 768e054de01bef8701c24ec49309e57e0167af44 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 15 Oct 2025 15:56:00 -0700 Subject: [PATCH 0136/1075] riscv: Remove the PER_CPU_OFFSET_SHIFT macro __per_cpu_offset is an array of unsigned long, so we can reuse the existing RISCV_LGPTR macro. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20251015225604.3860409-1-samuel.holland@sifive.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/asm.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 8bd2a11382a3..ac28066bb564 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -84,15 +84,9 @@ .endm #ifdef CONFIG_SMP -#ifdef CONFIG_32BIT -#define PER_CPU_OFFSET_SHIFT 2 -#else -#define PER_CPU_OFFSET_SHIFT 3 -#endif - .macro asm_per_cpu dst sym tmp lw \tmp, TASK_TI_CPU_NUM(tp) - slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT + slli \tmp, \tmp, RISCV_LGPTR la \dst, __per_cpu_offset add \dst, \dst, \tmp REG_L \tmp, 0(\dst) From d2721bb165b3ee00dd23525885381af07fec852a Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 14 Oct 2025 22:00:09 +0530 Subject: [PATCH 0137/1075] RISC-V: Don't print details of CPUs disabled in DT Early boot stages may disable CPU DT nodes for unavailable CPUs based on SKU, pinstraps, eFuse, etc. Currently, the riscv_early_of_processor_hartid() prints details of a CPU if it is disabled in DT which has no value and gives a false impression to the users that there some issue with the CPU. Fixes: e3d794d555cd ("riscv: treat cpu devicetree nodes without status as enabled") Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20251014163009.182381-1-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- arch/riscv/kernel/cpu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index f6b13e9f5e6c..3dbc8cc557dd 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -62,10 +62,8 @@ int __init riscv_early_of_processor_hartid(struct device_node *node, unsigned lo return -ENODEV; } - if (!of_device_is_available(node)) { - pr_info("CPU with hartid=%lu is not available\n", *hart); + if (!of_device_is_available(node)) return -ENODEV; - } if (of_property_read_string(node, "riscv,isa-base", &isa)) goto old_interface; From 492c513ec6de1ce51b5f033bd6c708e4b8e46ae4 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 14 Oct 2025 17:25:26 -0600 Subject: [PATCH 0138/1075] riscv: add a forward declaration for cpuinfo_op Add a forward declaration for cpuinfo_op to resolve a sparse warning. Link: https://lore.kernel.org/r/b831f349-5d0c-f7ac-8362-acb20bc6221a@kernel.org Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/cpufeature.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index fbd0e4306c93..62837fa981e8 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -31,6 +31,8 @@ struct riscv_isainfo { DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +extern const struct seq_operations cpuinfo_op; + /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; From 5d15d2ad36b0f7afab83ca9fc8a2a6e60cbe54c4 Mon Sep 17 00:00:00 2001 From: Jingwei Wang Date: Mon, 11 Aug 2025 22:20:06 +0800 Subject: [PATCH 0139/1075] riscv: hwprobe: Fix stale vDSO data for late-initialized keys at boot The hwprobe vDSO data for some keys, like MISALIGNED_VECTOR_PERF, is determined by an asynchronous kthread. This can create a race condition where the kthread finishes after the vDSO data has already been populated, causing userspace to read stale values. To fix this race, a new 'ready' flag is added to the vDSO data, initialized to 'false' during arch_initcall_sync. This flag is checked by both the vDSO's user-space code and the riscv_hwprobe syscall. The syscall serves as a one-time gate, using a completion to wait for any pending probes before populating the data and setting the flag to 'true', thus ensuring userspace reads fresh values on its first request. Reported-by: Tsukasa OI Closes: https://lore.kernel.org/linux-riscv/760d637b-b13b-4518-b6bf-883d55d44e7f@irq.a4lg.com/ Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Cc: Palmer Dabbelt Cc: Alexandre Ghiti Cc: Olof Johansson Cc: stable@vger.kernel.org Reviewed-by: Alexandre Ghiti Co-developed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt Signed-off-by: Jingwei Wang Link: https://lore.kernel.org/r/20250811142035.105820-1-wangjingwei@iscas.ac.cn [pjw@kernel.org: fix checkpatch issues] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/hwprobe.h | 7 ++ arch/riscv/include/asm/vdso/arch_data.h | 6 ++ arch/riscv/kernel/sys_hwprobe.c | 74 ++++++++++++++++++---- arch/riscv/kernel/unaligned_access_speed.c | 9 ++- arch/riscv/kernel/vdso/hwprobe.c | 2 +- 5 files changed, 81 insertions(+), 17 deletions(-) diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 948d2b34e94e..58f8dda73259 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -42,4 +42,11 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair, return pair->value == other_pair->value; } +#ifdef CONFIG_MMU +void riscv_hwprobe_register_async_probe(void); +void riscv_hwprobe_complete_async_probe(void); +#else +static inline void riscv_hwprobe_register_async_probe(void) {} +static inline void riscv_hwprobe_complete_async_probe(void) {} +#endif #endif diff --git a/arch/riscv/include/asm/vdso/arch_data.h b/arch/riscv/include/asm/vdso/arch_data.h index da57a3786f7a..88b37af55175 100644 --- a/arch/riscv/include/asm/vdso/arch_data.h +++ b/arch/riscv/include/asm/vdso/arch_data.h @@ -12,6 +12,12 @@ struct vdso_arch_data { /* Boolean indicating all CPUs have the same static hwprobe values. */ __u8 homogeneous_cpus; + + /* + * A gate to check and see if the hwprobe data is actually ready, as + * probing is deferred to avoid boot slowdowns. + */ + __u8 ready; }; #endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 000f4451a9d8..bc87bb9725fd 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -5,6 +5,9 @@ * more details. */ #include +#include +#include +#include #include #include #include @@ -454,28 +457,32 @@ static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs, return 0; } -static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, - size_t pair_count, size_t cpusetsize, - unsigned long __user *cpus_user, - unsigned int flags) -{ - if (flags & RISCV_HWPROBE_WHICH_CPUS) - return hwprobe_get_cpus(pairs, pair_count, cpusetsize, - cpus_user, flags); - - return hwprobe_get_values(pairs, pair_count, cpusetsize, - cpus_user, flags); -} - #ifdef CONFIG_MMU -static int __init init_hwprobe_vdso_data(void) +static DECLARE_COMPLETION(boot_probes_done); +static atomic_t pending_boot_probes = ATOMIC_INIT(1); + +void riscv_hwprobe_register_async_probe(void) +{ + atomic_inc(&pending_boot_probes); +} + +void riscv_hwprobe_complete_async_probe(void) +{ + if (atomic_dec_and_test(&pending_boot_probes)) + complete(&boot_probes_done); +} + +static int complete_hwprobe_vdso_data(void) { struct vdso_arch_data *avd = vdso_k_arch_data; u64 id_bitsmash = 0; struct riscv_hwprobe pair; int key; + if (unlikely(!atomic_dec_and_test(&pending_boot_probes))) + wait_for_completion(&boot_probes_done); + /* * Initialize vDSO data with the answers for the "all CPUs" case, to * save a syscall in the common case. @@ -503,13 +510,52 @@ static int __init init_hwprobe_vdso_data(void) * vDSO should defer to the kernel for exotic cpu masks. */ avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1; + + /* + * Make sure all the VDSO values are visible before we look at them. + * This pairs with the implicit "no speculativly visible accesses" + * barrier in the VDSO hwprobe code. + */ + smp_wmb(); + avd->ready = true; + return 0; +} + +static int __init init_hwprobe_vdso_data(void) +{ + struct vdso_arch_data *avd = vdso_k_arch_data; + + /* + * Prevent the vDSO cached values from being used, as they're not ready + * yet. + */ + avd->ready = false; return 0; } arch_initcall_sync(init_hwprobe_vdso_data); +#else + +static int complete_hwprobe_vdso_data(void) { return 0; } + #endif /* CONFIG_MMU */ +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, + size_t pair_count, size_t cpusetsize, + unsigned long __user *cpus_user, + unsigned int flags) +{ + DO_ONCE_SLEEPABLE(complete_hwprobe_vdso_data); + + if (flags & RISCV_HWPROBE_WHICH_CPUS) + return hwprobe_get_cpus(pairs, pair_count, cpusetsize, + cpus_user, flags); + + return hwprobe_get_values(pairs, pair_count, cpusetsize, + cpus_user, flags); +} + SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs, size_t, pair_count, size_t, cpusetsize, unsigned long __user *, cpus, unsigned int, flags) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index ae2068425fbc..70b5e6927620 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -379,6 +379,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { schedule_on_each_cpu(check_vector_unaligned_access); + riscv_hwprobe_complete_async_probe(); return 0; } @@ -473,8 +474,12 @@ static int __init check_unaligned_access_all_cpus(void) per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; } else if (!check_vector_unaligned_access_emulated_all_cpus() && IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { - kthread_run(vec_check_unaligned_access_speed_all_cpus, - NULL, "vec_check_unaligned_access_speed_all_cpus"); + riscv_hwprobe_register_async_probe(); + if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"))) { + pr_warn("Failed to create vec_unalign_check kthread\n"); + riscv_hwprobe_complete_async_probe(); + } } /* diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index 2ddeba6c68dd..8f45500d0a6e 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -27,7 +27,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, * homogeneous, then this function can handle requests for arbitrary * masks. */ - if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus)) + if (flags != 0 || (!all_cpus && !avd->homogeneous_cpus) || unlikely(!avd->ready)) return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags); /* This is something we can handle, fill out the pairs. */ From dbfdaeb381a49a7bc753d18e2876bc56a15e01cc Mon Sep 17 00:00:00 2001 From: Stuart Yoder Date: Sat, 18 Oct 2025 14:25:18 +0300 Subject: [PATCH 0140/1075] tpm_crb: Add idle support for the Arm FF-A start method According to the CRB over FF-A specification [1], a TPM that implements the ABI must comply with the TCG PTP specification. This requires support for the Idle and Ready states. This patch implements CRB control area requests for goIdle and cmdReady on FF-A based TPMs. The FF-A message used to notify the TPM of CRB updates includes a locality parameter, which provides a hint to the TPM about which locality modified the CRB. This patch adds a locality parameter to __crb_go_idle() and __crb_cmd_ready() to support this. [1] https://developer.arm.com/documentation/den0138/latest/ Signed-off-by: Stuart Yoder Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- drivers/char/tpm/tpm_crb.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index ed97344f2324..c75a531cfb98 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -133,8 +133,7 @@ static inline bool tpm_crb_has_idle(u32 start_method) { return !(start_method == ACPI_TPM2_START_METHOD || start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_START_METHOD || - start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC || - start_method == ACPI_TPM2_CRB_WITH_ARM_FFA); + start_method == ACPI_TPM2_COMMAND_BUFFER_WITH_ARM_SMC); } static bool crb_wait_for_reg_32(u32 __iomem *reg, u32 mask, u32 value, @@ -191,7 +190,7 @@ static int crb_try_pluton_doorbell(struct crb_priv *priv, bool wait_for_complete * * Return: 0 always */ -static int __crb_go_idle(struct device *dev, struct crb_priv *priv) +static int __crb_go_idle(struct device *dev, struct crb_priv *priv, int loc) { int rc; @@ -200,6 +199,12 @@ static int __crb_go_idle(struct device *dev, struct crb_priv *priv) iowrite32(CRB_CTRL_REQ_GO_IDLE, &priv->regs_t->ctrl_req); + if (priv->sm == ACPI_TPM2_CRB_WITH_ARM_FFA) { + rc = tpm_crb_ffa_start(CRB_FFA_START_TYPE_COMMAND, loc); + if (rc) + return rc; + } + rc = crb_try_pluton_doorbell(priv, true); if (rc) return rc; @@ -220,7 +225,7 @@ static int crb_go_idle(struct tpm_chip *chip) struct device *dev = &chip->dev; struct crb_priv *priv = dev_get_drvdata(dev); - return __crb_go_idle(dev, priv); + return __crb_go_idle(dev, priv, chip->locality); } /** @@ -238,7 +243,7 @@ static int crb_go_idle(struct tpm_chip *chip) * * Return: 0 on success -ETIME on timeout; */ -static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv) +static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv, int loc) { int rc; @@ -247,6 +252,12 @@ static int __crb_cmd_ready(struct device *dev, struct crb_priv *priv) iowrite32(CRB_CTRL_REQ_CMD_READY, &priv->regs_t->ctrl_req); + if (priv->sm == ACPI_TPM2_CRB_WITH_ARM_FFA) { + rc = tpm_crb_ffa_start(CRB_FFA_START_TYPE_COMMAND, loc); + if (rc) + return rc; + } + rc = crb_try_pluton_doorbell(priv, true); if (rc) return rc; @@ -267,7 +278,7 @@ static int crb_cmd_ready(struct tpm_chip *chip) struct device *dev = &chip->dev; struct crb_priv *priv = dev_get_drvdata(dev); - return __crb_cmd_ready(dev, priv); + return __crb_cmd_ready(dev, priv, chip->locality); } static int __crb_request_locality(struct device *dev, @@ -444,7 +455,7 @@ static int crb_send(struct tpm_chip *chip, u8 *buf, size_t bufsiz, size_t len) /* Seems to be necessary for every command */ if (priv->sm == ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) - __crb_cmd_ready(&chip->dev, priv); + __crb_cmd_ready(&chip->dev, priv, chip->locality); memcpy_toio(priv->cmd, buf, len); @@ -672,7 +683,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, * PTT HW bug w/a: wake up the device to access * possibly not retained registers. */ - ret = __crb_cmd_ready(dev, priv); + ret = __crb_cmd_ready(dev, priv, 0); if (ret) goto out_relinquish_locality; @@ -744,7 +755,7 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, if (!ret) priv->cmd_size = cmd_size; - __crb_go_idle(dev, priv); + __crb_go_idle(dev, priv, 0); out_relinquish_locality: From 2dc99ea2727640b2fe12f9aa0e38ea2fc3cbb92d Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 18 Oct 2025 00:31:11 -0600 Subject: [PATCH 0141/1075] riscv: cpufeature: avoid uninitialized variable in has_thead_homogeneous_vlenb() In has_thead_homogeneous_vlenb(), smatch detected that the vlenb variable could be used while uninitialized. It appears that this could happen if no CPUs described in DT have the "thead,vlenb" property. Fix by initializing vlenb to 0, which will keep thead_vlenb_of set to 0 (as it was statically initialized). This in turn will cause riscv_v_setup_vsize() to fall back to CSR probing - the desired result if thead,vlenb isn't provided in the DT data. While here, fix a nearby comment typo. Cc: stable@vger.kernel.org Cc: Charlie Jenkins Fixes: 377be47f90e41 ("riscv: vector: Use vlenb from DT for thead") Signed-off-by: Paul Walmsley Link: https://lore.kernel.org/r/22674afb-2fe8-2a83-1818-4c37bd554579@kernel.org --- arch/riscv/kernel/cpufeature.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 67b59699357d..72ca768f4e91 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -932,9 +932,9 @@ static int has_thead_homogeneous_vlenb(void) { int cpu; u32 prev_vlenb = 0; - u32 vlenb; + u32 vlenb = 0; - /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + /* Ignore thead,vlenb property if xtheadvector is not enabled in the kernel */ if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) return 0; From b7776a802f2f80139f96530a489dd00fd7089eda Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 18 Oct 2025 09:32:12 -0600 Subject: [PATCH 0142/1075] riscv: hwprobe: avoid uninitialized variable use in hwprobe_arch_id() Resolve this smatch warning: arch/riscv/kernel/sys_hwprobe.c:50 hwprobe_arch_id() error: uninitialized symbol 'cpu_id'. This could happen if hwprobe_arch_id() was called with a key ID of something other than MVENDORID, MIMPID, and MARCHID. This does not happen in the current codebase. The only caller of hwprobe_arch_id() is a function that only passes one of those three key IDs. For the sake of reducing static analyzer warning noise, and in the unlikely event that hwprobe_arch_id() is someday called with some other key ID, validate hwprobe_arch_id()'s input to ensure that 'cpu_id' is always initialized before use. Fixes: ea3de9ce8aa280 ("RISC-V: Add a syscall for HW probing") Cc: Evan Green Signed-off-by: Paul Walmsley Link: https://lore.kernel.org/r/cf5a13ec-19d0-9862-059b-943f36107bf3@kernel.org --- arch/riscv/kernel/sys_hwprobe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index bc87bb9725fd..199d13f86f31 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -31,6 +31,11 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, bool first = true; int cpu; + if (pair->key != RISCV_HWPROBE_KEY_MVENDORID && + pair->key != RISCV_HWPROBE_KEY_MIMPID && + pair->key != RISCV_HWPROBE_KEY_MARCHID) + goto out; + for_each_cpu(cpu, cpus) { u64 cpu_id; @@ -61,6 +66,7 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, } } +out: pair->value = id; } From 1386d16761c0b569efedb998f56c1ae048a086e2 Mon Sep 17 00:00:00 2001 From: J-Donald Tournier Date: Sat, 18 Oct 2025 15:52:26 +0100 Subject: [PATCH 0143/1075] ALSA: hda/realtek: Add quirk for Lenovo Yoga 7 2-in-1 14AKP10 This laptop requires the same quirk as Lenovo Yoga9 14IAP7 for fixing the bass speaker problems. Use HDA_CODEC_QUIRK to match on the codec SSID to avoid conflict with the Lenovo Legion Slim 7 16IRH8, which has the same PCI SSID. Signed-off-by: J-Donald Tournier Link: https://patch.msgid.link/20251018145322.39119-1-jdournier@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 8ad5febd822a..517e2cd6ad35 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -7080,6 +7080,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD), SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), + HDA_CODEC_QUIRK(0x17aa, 0x391c, "Lenovo Yoga 7 2-in-1 14AKP10", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38b7, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2), From 7963891f7c9c6f759cc9ab7da71406b4234f3dd6 Mon Sep 17 00:00:00 2001 From: Roy Vegard Ovesen Date: Sat, 18 Oct 2025 19:18:22 +0200 Subject: [PATCH 0144/1075] ALSA: usb-audio: fix control pipe direction Since the requesttype has USB_DIR_OUT the pipe should be constructed with usb_sndctrlpipe(). Fixes: 8dc5efe3d17c ("ALSA: usb-audio: Add support for Presonus Studio 1810c") Signed-off-by: Roy Vegard Ovesen Link: https://patch.msgid.link/aPPL3tBFE_oU-JHv@ark Signed-off-by: Takashi Iwai --- sound/usb/mixer_s1810c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/mixer_s1810c.c b/sound/usb/mixer_s1810c.c index 15960d25e748..89c652434f71 100644 --- a/sound/usb/mixer_s1810c.c +++ b/sound/usb/mixer_s1810c.c @@ -178,7 +178,7 @@ snd_sc1810c_get_status_field(struct usb_device *dev, pkt_out.fields[SC1810C_STATE_F1_IDX] = SC1810C_SET_STATE_F1; pkt_out.fields[SC1810C_STATE_F2_IDX] = SC1810C_SET_STATE_F2; - ret = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), + ret = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), SC1810C_SET_STATE_REQ, SC1810C_SET_STATE_REQTYPE, (*seqnum), 0, &pkt_out, sizeof(pkt_out)); From 88de89f184661ebb946804a5abdf2bdec7f0a7ab Mon Sep 17 00:00:00 2001 From: YanLong Dai Date: Wed, 24 Sep 2025 14:14:44 +0800 Subject: [PATCH 0145/1075] RDMA/bnxt_re: Fix a potential memory leak in destroy_gsi_sqp The current error handling path in bnxt_re_destroy_gsi_sqp() could lead to a resource leak. When bnxt_qplib_destroy_qp() fails, the function jumps to the 'fail' label and returns immediately, skipping the call to bnxt_qplib_free_qp_res(). Continue the resource teardown even if bnxt_qplib_destroy_qp() fails, which aligns with the driver's general error handling strategy and prevents the potential leak. Fixes: 8dae419f9ec73 ("RDMA/bnxt_re: Refactor queue pair creation code") Signed-off-by: YanLong Dai Link: https://patch.msgid.link/20250924061444.11288-1-daiyanlong@kylinos.cn Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 4dab5ca7362b..84ce3fce2826 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -913,7 +913,7 @@ void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, spin_unlock_irqrestore(&qp->scq->cq_lock, flags); } -static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) +static void bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) { struct bnxt_re_qp *gsi_sqp; struct bnxt_re_ah *gsi_sah; @@ -933,10 +933,9 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n"); rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &gsi_sqp->qplib_qp); - if (rc) { + if (rc) ibdev_err(&rdev->ibdev, "Destroy Shadow QP failed"); - goto fail; - } + bnxt_qplib_free_qp_res(&rdev->qplib_res, &gsi_sqp->qplib_qp); /* remove from active qp list */ @@ -951,10 +950,6 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) rdev->gsi_ctx.gsi_sqp = NULL; rdev->gsi_ctx.gsi_sah = NULL; rdev->gsi_ctx.sqp_tbl = NULL; - - return 0; -fail: - return rc; } static void bnxt_re_del_unique_gid(struct bnxt_re_dev *rdev) From 8d158f47f1f33d8747e80c3afbea5aa337e59d41 Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Tue, 23 Sep 2025 19:08:50 +0000 Subject: [PATCH 0146/1075] RDMA/irdma: Fix SD index calculation In some cases, it is possible for pble_rsrc->next_fpm_addr to be larger than u32, so remove the u32 cast to avoid unintentional truncation. This fixes the following error that can be observed when registering massive memory regions: [ 447.227494] (NULL ib_device): cqp opcode = 0x1f maj_err_code = 0xffff min_err_code = 0x800c [ 447.227505] (NULL ib_device): [Update PE SDs Cmd Error][op_code=21] status=-5 waiting=1 completion_err=1 maj=0xffff min=0x800c Fixes: e8c4dbc2fcac ("RDMA/irdma: Add PBLE resource manager") Signed-off-by: Jacob Moroni Link: https://patch.msgid.link/20250923190850.1022773-1-jmoroni@google.com Acked-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/pble.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c index 3091f9345f12..fa6325adaede 100644 --- a/drivers/infiniband/hw/irdma/pble.c +++ b/drivers/infiniband/hw/irdma/pble.c @@ -71,7 +71,7 @@ int irdma_hmc_init_pble(struct irdma_sc_dev *dev, static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, struct sd_pd_idx *idx) { - idx->sd_idx = (u32)pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE; + idx->sd_idx = pble_rsrc->next_fpm_addr / IRDMA_HMC_DIRECT_BP_SIZE; idx->pd_idx = (u32)(pble_rsrc->next_fpm_addr / IRDMA_HMC_PAGED_BP_SIZE); idx->rel_pd_idx = (idx->pd_idx % IRDMA_HMC_PD_CNT_IN_SD); } From 5575b7646b94c0afb0f4c0d86e00e13cf3397a62 Mon Sep 17 00:00:00 2001 From: Jacob Moroni Date: Tue, 23 Sep 2025 14:24:39 +0000 Subject: [PATCH 0147/1075] RDMA/irdma: Set irdma_cq cq_num field during CQ create The driver maintains a CQ table that is used to ensure that a CQ is still valid when processing CQ related AEs. When a CQ is destroyed, the table entry is cleared, using irdma_cq.cq_num as the index. This field was never being set, so it was just always clearing out entry 0. Additionally, the cq_num field size was increased to accommodate HW supporting more than 64K CQs. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Jacob Moroni Link: https://patch.msgid.link/20250923142439.943930-1-jmoroni@google.com Acked-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 1 + drivers/infiniband/hw/irdma/verbs.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 76ce6137f2ba..c883c9ea5a83 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2503,6 +2503,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, spin_lock_init(&iwcq->lock); INIT_LIST_HEAD(&iwcq->resize_list); INIT_LIST_HEAD(&iwcq->cmpl_generated); + iwcq->cq_num = cq_num; info.dev = dev; ukinfo->cq_size = max(entries, 4); ukinfo->cq_id = cq_num; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index ed21c1b56e8e..ac8b38701835 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -140,7 +140,7 @@ struct irdma_srq { struct irdma_cq { struct ib_cq ibcq; struct irdma_sc_cq sc_cq; - u16 cq_num; + u32 cq_num; bool user_mode; atomic_t armed; enum irdma_cmpl_notify last_notify; From d8713158faad0fd4418cb2f4e432c3876ad53a1f Mon Sep 17 00:00:00 2001 From: Shuhao Fu Date: Fri, 10 Oct 2025 10:55:17 +0800 Subject: [PATCH 0148/1075] RDMA/uverbs: Fix umem release in UVERBS_METHOD_CQ_CREATE In `UVERBS_METHOD_CQ_CREATE`, umem should be released if anything goes wrong. Currently, if `create_cq_umem` fails, umem would not be released or referenced, causing a possible leak. In this patch, we release umem at `UVERBS_METHOD_CQ_CREATE`, the driver should not release umem if it returns an error code. Fixes: 1a40c362ae26 ("RDMA/uverbs: Add a common way to create CQ with umem") Signed-off-by: Shuhao Fu Link: https://patch.msgid.link/aOh1le4YqtYwj-hH@osx.local Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_std_types_cq.c | 1 + drivers/infiniband/hw/efa/efa_verbs.c | 16 +++++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index 37cd37556510..fab5d914029d 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -206,6 +206,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( return ret; err_free: + ib_umem_release(umem); rdma_restrack_put(&cq->res); kfree(cq); err_event_file: diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index d9a12681f843..22d3e25c3b9d 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1216,13 +1216,13 @@ int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (umem->length < cq->size) { ibdev_dbg(&dev->ibdev, "External memory too small\n"); err = -EINVAL; - goto err_free_mem; + goto err_out; } if (!ib_umem_is_contiguous(umem)) { ibdev_dbg(&dev->ibdev, "Non contiguous CQ unsupported\n"); err = -EINVAL; - goto err_free_mem; + goto err_out; } cq->cpu_addr = NULL; @@ -1251,7 +1251,7 @@ int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, err = efa_com_create_cq(&dev->edev, ¶ms, &result); if (err) - goto err_free_mem; + goto err_free_mapped; resp.db_off = result.db_off; resp.cq_idx = result.cq_idx; @@ -1299,12 +1299,10 @@ int efa_create_cq_umem(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, efa_cq_user_mmap_entries_remove(cq); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); -err_free_mem: - if (umem) - ib_umem_release(umem); - else - efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE); - +err_free_mapped: + if (!umem) + efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, + DMA_FROM_DEVICE); err_out: atomic64_inc(&dev->stats.create_cq_err); return err; From 77e67d5daaf155f7d0f99f4e797c4842169ec19e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Sep 2025 14:20:16 +0300 Subject: [PATCH 0149/1075] wifi: iwlwifi: fix potential use after free in iwl_mld_remove_link() This code frees "link" by calling kfree_rcu(link, rcu_head) and then it dereferences "link" to get the "link->fw_id". Save the "link->fw_id" first to avoid a potential use after free. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aNKCcKlbSkkS4_gO@stanley.mountain Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/link.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index 782fc41aa1c3..960dcd208f00 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -501,6 +501,7 @@ void iwl_mld_remove_link(struct iwl_mld *mld, struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(bss_conf->vif); struct iwl_mld_link *link = iwl_mld_link_from_mac80211(bss_conf); bool is_deflink = link == &mld_vif->deflink; + u8 fw_id = link->fw_id; if (WARN_ON(!link || link->active)) return; @@ -513,10 +514,10 @@ void iwl_mld_remove_link(struct iwl_mld *mld, RCU_INIT_POINTER(mld_vif->link[bss_conf->link_id], NULL); - if (WARN_ON(link->fw_id >= mld->fw->ucode_capa.num_links)) + if (WARN_ON(fw_id >= mld->fw->ucode_capa.num_links)) return; - RCU_INIT_POINTER(mld->fw_id_to_bss_conf[link->fw_id], NULL); + RCU_INIT_POINTER(mld->fw_id_to_bss_conf[fw_id], NULL); } void iwl_mld_handle_missed_beacon_notif(struct iwl_mld *mld, From 211ddde0823f1442e4ad052a2f30f050145ccada Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Oct 2025 15:19:16 -1000 Subject: [PATCH 0150/1075] Linux 6.18-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 17cfa11ca716..d14824792227 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* From 248adfe32bfd75afbcb8f6d4b68f7e0a9fb2c438 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Fri, 17 Oct 2025 17:15:28 +0100 Subject: [PATCH 0151/1075] ASoC: cs530x: Correct log message with expected variable The function used one parameter for the switch statement, but logged a different parameter when it defaulted. Signed-off-by: Simon Trimmer Signed-off-by: Vitaly Rodionov Link: https://patch.msgid.link/20251017161543.214235-2-vitalyr@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs530x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs530x.c b/sound/soc/codecs/cs530x.c index b9eff240b929..535387cd7aa3 100644 --- a/sound/soc/codecs/cs530x.c +++ b/sound/soc/codecs/cs530x.c @@ -793,7 +793,7 @@ static int cs530x_set_sysclk(struct snd_soc_component *component, int clk_id, case CS530X_SYSCLK_SRC_PLL: break; default: - dev_err(component->dev, "Invalid clock id %d\n", clk_id); + dev_err(component->dev, "Invalid sysclk source: %d\n", source); return -EINVAL; } From ec20584f25233bfe292c8e18f9a429dfaff58a49 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 16 Oct 2025 10:48:44 +0100 Subject: [PATCH 0152/1075] ASoC: cs-amp-lib-test: Fix missing include of kunit/test-bug.h cs-amp-lib-test uses functions from kunit/test-bug.h but wasn't including it. This error was found by smatch. Fixes: 177862317a98 ("ASoC: cs-amp-lib: Add KUnit test for calibration helpers") Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20251016094844.92796-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs-amp-lib-test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/cs-amp-lib-test.c b/sound/soc/codecs/cs-amp-lib-test.c index 2fde84309338..3406887cdfa2 100644 --- a/sound/soc/codecs/cs-amp-lib-test.c +++ b/sound/soc/codecs/cs-amp-lib-test.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include From ef30cb1304f033eaee3b46e22b8f523446db8f53 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 16 Oct 2025 15:08:37 +0000 Subject: [PATCH 0153/1075] ASoC: amd: acp: Add ACP7.0 match entries for cs35l56 and cs42l43 This adds some match entries for a few system configurations: cs42l43 link 0 UID 0 cs35l56 link 1 UID 0 cs35l56 link 1 UID 1 cs35l56 link 1 UID 2 cs35l56 link 1 UID 3 cs42l43 link 1 UID 0 cs35l56 link 1 UID 0 cs35l56 link 1 UID 1 cs35l56 link 1 UID 2 cs35l56 link 1 UID 3 cs35l56 link 1 UID 0 cs35l56 link 1 UID 1 cs35l56 link 1 UID 2 cs35l56 link 1 UID 3 Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20251016150837.320886-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/amd/acp/amd-acp70-acpi-match.c | 157 +++++++++++++++++++++++ 1 file changed, 157 insertions(+) diff --git a/sound/soc/amd/acp/amd-acp70-acpi-match.c b/sound/soc/amd/acp/amd-acp70-acpi-match.c index dcecac792e6d..871b4f054a84 100644 --- a/sound/soc/amd/acp/amd-acp70-acpi-match.c +++ b/sound/soc/amd/acp/amd-acp70-acpi-match.c @@ -30,6 +30,20 @@ static const struct snd_soc_acpi_endpoint spk_r_endpoint = { .group_id = 1 }; +static const struct snd_soc_acpi_endpoint spk_2_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 2, + .group_id = 1 +}; + +static const struct snd_soc_acpi_endpoint spk_3_endpoint = { + .num = 0, + .aggregated = 1, + .group_position = 3, + .group_id = 1 +}; + static const struct snd_soc_acpi_adr_device rt711_rt1316_group_adr[] = { { .adr = 0x000030025D071101ull, @@ -112,6 +126,134 @@ static const struct snd_soc_acpi_adr_device rt1320_1_single_adr[] = { } }; +static const struct snd_soc_acpi_endpoint cs42l43_endpoints[] = { + { /* Jack Playback Endpoint */ + .num = 0, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* DMIC Capture Endpoint */ + .num = 1, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Jack Capture Endpoint */ + .num = 2, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, + { /* Speaker Playback Endpoint */ + .num = 3, + .aggregated = 0, + .group_position = 0, + .group_id = 0, + }, +}; + +static const struct snd_soc_acpi_adr_device cs42l43_0_adr[] = { + { + .adr = 0x00003001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + } +}; + +static const struct snd_soc_acpi_adr_device cs42l43_1_cs35l56x4_1_adr[] = { + { + .adr = 0x00013001FA424301ull, + .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), + .endpoints = cs42l43_endpoints, + .name_prefix = "cs42l43" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_adr_device cs35l56x4_1_adr[] = { + { + .adr = 0x00013301FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_l_endpoint, + .name_prefix = "AMP1" + }, + { + .adr = 0x00013201FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_r_endpoint, + .name_prefix = "AMP2" + }, + { + .adr = 0x00013101FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_2_endpoint, + .name_prefix = "AMP3" + }, + { + .adr = 0x00013001FA355601ull, + .num_endpoints = 1, + .endpoints = &spk_3_endpoint, + .name_prefix = "AMP4" + }, +}; + +static const struct snd_soc_acpi_link_adr acp70_cs42l43_l1_cs35l56x4_l1[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs42l43_1_cs35l56x4_1_adr), + .adr_d = cs42l43_1_cs35l56x4_1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp70_cs42l43_l0_cs35l56x4_l1[] = { + { + .mask = BIT(0), + .num_adr = ARRAY_SIZE(cs42l43_0_adr), + .adr_d = cs42l43_0_adr, + }, + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_1_adr), + .adr_d = cs35l56x4_1_adr, + }, + {} +}; + +static const struct snd_soc_acpi_link_adr acp70_cs35l56x4_l1[] = { + { + .mask = BIT(1), + .num_adr = ARRAY_SIZE(cs35l56x4_1_adr), + .adr_d = cs35l56x4_1_adr, + }, + {} +}; + static const struct snd_soc_acpi_link_adr acp70_rt722_only[] = { { .mask = BIT(0), @@ -151,6 +293,21 @@ struct snd_soc_acpi_mach snd_soc_acpi_amd_acp70_sdw_machines[] = { .links = acp70_4_in_1_sdca, .drv_name = "amd_sdw", }, + { + .link_mask = BIT(0) | BIT(1), + .links = acp70_cs42l43_l0_cs35l56x4_l1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp70_cs42l43_l1_cs35l56x4_l1, + .drv_name = "amd_sdw", + }, + { + .link_mask = BIT(1), + .links = acp70_cs35l56x4_l1, + .drv_name = "amd_sdw", + }, {}, }; EXPORT_SYMBOL(snd_soc_acpi_amd_acp70_sdw_machines); From bf6fb4a272739e0d1b2c570276324142517d1905 Mon Sep 17 00:00:00 2001 From: Sharique Mohammad Date: Thu, 16 Oct 2025 17:11:52 +0200 Subject: [PATCH 0154/1075] ASOC: max98090/91: fix for filter configuration: AHPF removed DMIC2_HPF added The filter configuration register(0x26) has AHPF(bit6) for primary record path, which is common in max98090 and max98091 and has been defined as DAPM suppy widget as "AHPF" in "struct snd_soc_dapm_widget max98090_dapm_widget[]". It is the DC-Blocking filter for the primary record path. But the same functionality for secondary record path in the configuration register(0x26) is DMIC2_HPF(bit2). It is not present as a DAPM supply widget in the current code. With this patch adding it as a DAPM supply widget. In the current code, the mics on secondary record path in code are named as "DMIC3" and "DMIC4", so accordingly naming DMIC2_HPF(bit2) as "DMIC34_HPF", and declaring it as a DAPM supply widget in "struct snd_soc_dapm_widget max98091_dapm_widget[]". Also it is specific to max98091, and should be visible or working only when max98091 codec chip is used. Therefore, written in "max98091_dapm_widget[]". As "AHPF" is not part of secondary record path, replacing it with "DMIC34_HPF" in the ALSA routes to "DMIC3" and "DMIC4" in "max98091_dapm_routes[]". Signed-off-by: Sharique Mohammad Link: https://patch.msgid.link/20251016151152.1107083-1-sharq0406@gmail.com Signed-off-by: Mark Brown --- sound/soc/codecs/max98090.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c index cb1508fc99f8..5aff5a459a43 100644 --- a/sound/soc/codecs/max98090.c +++ b/sound/soc/codecs/max98090.c @@ -1239,6 +1239,8 @@ static const struct snd_soc_dapm_widget max98091_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("DMIC4_ENA", M98090_REG_DIGITAL_MIC_ENABLE, M98090_DIGMIC4_SHIFT, 0, max98090_shdn_event, SND_SOC_DAPM_POST_PMU), + SND_SOC_DAPM_SUPPLY("DMIC34_HPF", M98090_REG_FILTER_CONFIG, + M98090_FLT_DMIC34HPF_SHIFT, 0, NULL, 0), }; static const struct snd_soc_dapm_route max98090_dapm_routes[] = { @@ -1427,8 +1429,8 @@ static const struct snd_soc_dapm_route max98091_dapm_routes[] = { /* DMIC inputs */ {"DMIC3", NULL, "DMIC3_ENA"}, {"DMIC4", NULL, "DMIC4_ENA"}, - {"DMIC3", NULL, "AHPF"}, - {"DMIC4", NULL, "AHPF"}, + {"DMIC3", NULL, "DMIC34_HPF"}, + {"DMIC4", NULL, "DMIC34_HPF"}, }; static int max98090_add_widgets(struct snd_soc_component *component) From 3bcdbc221d676f871e23da30fd485a76728f55c7 Mon Sep 17 00:00:00 2001 From: Simon Trimmer Date: Thu, 16 Oct 2025 11:26:01 +0000 Subject: [PATCH 0155/1075] ASoC: Intel: soc-acpi-intel-ptl-match: Remove cs42l43 match from sdw link3 Removing this match entry ensures that a PTL system comprising of a cs42l43 codec on link3 will use function topologies. Previously the behaviour would be use the monolithic topology associated with this codec match table entry in preference to function topologies and if the system had a number of smart amplifiers then they would not be instantiated. Signed-off-by: Simon Trimmer Link: https://patch.msgid.link/20251016112601.187020-1-simont@opensource.cirrus.com Signed-off-by: Mark Brown --- .../intel/common/soc-acpi-intel-ptl-match.c | 52 ------------------- 1 file changed, 52 deletions(-) diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 3c8b10e21ceb..4853f4f31786 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -227,33 +227,6 @@ static const struct snd_soc_acpi_endpoint cs42l43_amp_spkagg_endpoints[] = { }, }; -static const struct snd_soc_acpi_endpoint cs42l43_endpoints[] = { - { /* Jack Playback Endpoint */ - .num = 0, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, - { /* DMIC Capture Endpoint */ - .num = 1, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, - { /* Jack Capture Endpoint */ - .num = 2, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, - { /* Speaker Playback Endpoint */ - .num = 3, - .aggregated = 0, - .group_position = 0, - .group_id = 0, - }, -}; - static const struct snd_soc_acpi_adr_device cs42l43_2_adr[] = { { .adr = 0x00023001fa424301ull, @@ -305,15 +278,6 @@ static const struct snd_soc_acpi_adr_device cs35l56_3_3amp_adr[] = { } }; -static const struct snd_soc_acpi_adr_device cs42l43_3_adr[] = { - { - .adr = 0x00033001FA424301ull, - .num_endpoints = ARRAY_SIZE(cs42l43_endpoints), - .endpoints = cs42l43_endpoints, - .name_prefix = "cs42l43" - } -}; - static const struct snd_soc_acpi_adr_device rt711_sdca_0_adr[] = { { .adr = 0x000030025D071101ull, @@ -486,15 +450,6 @@ static const struct snd_soc_acpi_link_adr ptl_cs42l43_l2_cs35l56x6_l13[] = { {} }; -static const struct snd_soc_acpi_link_adr ptl_cs42l43_l3[] = { - { - .mask = BIT(3), - .num_adr = ARRAY_SIZE(cs42l43_3_adr), - .adr_d = cs42l43_3_adr, - }, - {} -}; - static const struct snd_soc_acpi_link_adr ptl_rt721_l0[] = { { .mask = BIT(0), @@ -712,13 +667,6 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = { .sof_tplg_filename = "sof-ptl-rt722.tplg", .get_function_tplg_files = sof_sdw_get_tplg_files, }, - { - .link_mask = BIT(3), - .links = ptl_cs42l43_l3, - .drv_name = "sof_sdw", - .sof_tplg_filename = "sof-ptl-cs42l43-l3.tplg", - .get_function_tplg_files = sof_sdw_get_tplg_files, - }, { .link_mask = BIT(3), .links = ptl_sdw_rt712_vb_l3_rt1320_l3, From fdbb53d318aa94a094434e5f226617f0eb1e8f22 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Fri, 17 Oct 2025 09:52:56 +0100 Subject: [PATCH 0156/1075] ASoC: qdsp6: q6asm: do not sleep while atomic For some reason we ended up kfree between spinlock lock and unlock, which can sleep. move the kfree out of spinlock section. Fixes: a2a5d30218fd ("ASoC: qdsp6: q6asm: Add support to memory map and unmap") Cc: Stable@vger.kernel.org Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20251017085307.4325-2-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- sound/soc/qcom/qdsp6/q6asm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c index 06a802f9dba5..67e9ca18883c 100644 --- a/sound/soc/qcom/qdsp6/q6asm.c +++ b/sound/soc/qcom/qdsp6/q6asm.c @@ -377,9 +377,9 @@ static void q6asm_audio_client_free_buf(struct audio_client *ac, spin_lock_irqsave(&ac->lock, flags); port->num_periods = 0; + spin_unlock_irqrestore(&ac->lock, flags); kfree(port->buf); port->buf = NULL; - spin_unlock_irqrestore(&ac->lock, flags); } /** From 41de7440e6a00b8e70a068c50e3fba2f56302e8a Mon Sep 17 00:00:00 2001 From: Alexis Czezar Torreno Date: Wed, 1 Oct 2025 08:37:07 +0800 Subject: [PATCH 0157/1075] hwmon: (pmbus/max34440) Update adpm12160 coeff due to latest FW adpm12160 is a dc-dc power module. The firmware was updated and the coeeficients in the pmbus_driver_info needs to be updated. Since the part has not yet released with older FW, this permanent change to reflect the latest should be ok. Signed-off-by: Alexis Czezar Torreno Link: https://lore.kernel.org/r/20251001-hwmon-next-v1-1-f8ca6a648203@analog.com Fixes: 629cf8f6c23a ("hwmon: (pmbus/max34440) Add support for ADPM12160") Cc: stable@vger.kernel.org # v6.16+ Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/max34440.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/pmbus/max34440.c b/drivers/hwmon/pmbus/max34440.c index 56834d26f8ef..ef981ed97da8 100644 --- a/drivers/hwmon/pmbus/max34440.c +++ b/drivers/hwmon/pmbus/max34440.c @@ -336,18 +336,18 @@ static struct pmbus_driver_info max34440_info[] = { .format[PSC_CURRENT_IN] = direct, .format[PSC_CURRENT_OUT] = direct, .format[PSC_TEMPERATURE] = direct, - .m[PSC_VOLTAGE_IN] = 1, + .m[PSC_VOLTAGE_IN] = 125, .b[PSC_VOLTAGE_IN] = 0, .R[PSC_VOLTAGE_IN] = 0, - .m[PSC_VOLTAGE_OUT] = 1, + .m[PSC_VOLTAGE_OUT] = 125, .b[PSC_VOLTAGE_OUT] = 0, .R[PSC_VOLTAGE_OUT] = 0, - .m[PSC_CURRENT_IN] = 1, + .m[PSC_CURRENT_IN] = 250, .b[PSC_CURRENT_IN] = 0, - .R[PSC_CURRENT_IN] = 2, - .m[PSC_CURRENT_OUT] = 1, + .R[PSC_CURRENT_IN] = -1, + .m[PSC_CURRENT_OUT] = 250, .b[PSC_CURRENT_OUT] = 0, - .R[PSC_CURRENT_OUT] = 2, + .R[PSC_CURRENT_OUT] = -1, .m[PSC_TEMPERATURE] = 1, .b[PSC_TEMPERATURE] = 0, .R[PSC_TEMPERATURE] = 2, From ab0fd09d25e1d706e1ffc240f5cf66dcc89eeb49 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 10 Oct 2025 13:43:59 -0700 Subject: [PATCH 0158/1075] hwmon: (gpd-fan) Fix return value when platform_get_resource() fails When platform_get_resource() fails it returns NULL and not an error pointer, accordingly change the error handling. Fixes: 0ab88e239439 ("hwmon: add GPD devices sensor driver") Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20251010204359.94300-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Guenter Roeck --- drivers/hwmon/gpd-fan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/gpd-fan.c b/drivers/hwmon/gpd-fan.c index 644dc3ca9df7..eebe39ef9677 100644 --- a/drivers/hwmon/gpd-fan.c +++ b/drivers/hwmon/gpd-fan.c @@ -615,8 +615,8 @@ static int gpd_fan_probe(struct platform_device *pdev) const struct device *hwdev; res = platform_get_resource(pdev, IORESOURCE_IO, 0); - if (IS_ERR(res)) - return dev_err_probe(dev, PTR_ERR(res), + if (!res) + return dev_err_probe(dev, -EINVAL, "Failed to get platform resource\n"); region = devm_request_region(dev, res->start, From 72ac14851012d45dcbb9d3533e372e33001b873e Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 10 Oct 2025 13:44:46 -0700 Subject: [PATCH 0159/1075] hwmon: (gpd-fan) Fix error handling in gpd_fan_probe() devm_request_region() returns a NULL pointer on error, not an ERR_PTR(). Handle it accordingly. Also fix error return from the call to devm_hwmon_device_register_with_info(). Fixes: 0ab88e239439 ("hwmon: add GPD devices sensor driver") Signed-off-by: Harshit Mogalapalli Reviewed-by: Cryolitia PukNgae Link: https://lore.kernel.org/r/20251010204447.94343-1-harshit.m.mogalapalli@oracle.com [groeck: Updated subject to improve readability] Signed-off-by: Guenter Roeck --- drivers/hwmon/gpd-fan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/gpd-fan.c b/drivers/hwmon/gpd-fan.c index eebe39ef9677..321794807e8d 100644 --- a/drivers/hwmon/gpd-fan.c +++ b/drivers/hwmon/gpd-fan.c @@ -621,8 +621,8 @@ static int gpd_fan_probe(struct platform_device *pdev) region = devm_request_region(dev, res->start, resource_size(res), DRIVER_NAME); - if (IS_ERR(region)) - return dev_err_probe(dev, PTR_ERR(region), + if (!region) + return dev_err_probe(dev, -EBUSY, "Failed to request region\n"); hwdev = devm_hwmon_device_register_with_info(dev, @@ -631,7 +631,7 @@ static int gpd_fan_probe(struct platform_device *pdev) &gpd_fan_chip_info, NULL); if (IS_ERR(hwdev)) - return dev_err_probe(dev, PTR_ERR(region), + return dev_err_probe(dev, PTR_ERR(hwdev), "Failed to register hwmon device\n"); return 0; From 57f6f47920ef2f598c46d0a04bd9c8984c98e6df Mon Sep 17 00:00:00 2001 From: Erick Karanja Date: Sun, 12 Oct 2025 21:12:49 +0300 Subject: [PATCH 0160/1075] hwmon: (pmbus/isl68137) Fix child node reference leak on early return In the case of an early return, the reference to the child node needs to be released. Use for_each_child_of_node_scoped to fix the issue. Fixes: 3996187f80a0e ("hwmon: (pmbus/isl68137) add support for voltage divider on Vout") Signed-off-by: Erick Karanja Link: https://lore.kernel.org/r/20251012181249.359401-1-karanja99erick@gmail.com [groeck: Updated subject/description] Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/isl68137.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c index 52cf62e45a86..6bba9b50c51b 100644 --- a/drivers/hwmon/pmbus/isl68137.c +++ b/drivers/hwmon/pmbus/isl68137.c @@ -336,10 +336,9 @@ static int isl68137_probe_from_dt(struct device *dev, struct isl68137_data *data) { const struct device_node *np = dev->of_node; - struct device_node *child; int err; - for_each_child_of_node(np, child) { + for_each_child_of_node_scoped(np, child) { if (strcmp(child->name, "channel")) continue; From a09a5aa8bf258ddc99a22c30f17fe304b96b5350 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Fri, 17 Oct 2025 14:34:14 +0800 Subject: [PATCH 0161/1075] hwmon: (cgbc-hwmon) Add missing NULL check after devm_kzalloc() The driver allocates memory for sensor data using devm_kzalloc(), but did not check if the allocation succeeded. In case of memory allocation failure, dereferencing the NULL pointer would lead to a kernel crash. Add a NULL pointer check and return -ENOMEM to handle allocation failure properly. Signed-off-by: Li Qiang Fixes: 08ebc9def79fc ("hwmon: Add Congatec Board Controller monitoring driver") Reviewed-by: Thomas Richard Link: https://lore.kernel.org/r/20251017063414.1557447-1-liqiang01@kylinos.cn Signed-off-by: Guenter Roeck --- drivers/hwmon/cgbc-hwmon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/cgbc-hwmon.c b/drivers/hwmon/cgbc-hwmon.c index 772f44d56ccf..3aff4e092132 100644 --- a/drivers/hwmon/cgbc-hwmon.c +++ b/drivers/hwmon/cgbc-hwmon.c @@ -107,6 +107,9 @@ static int cgbc_hwmon_probe_sensors(struct device *dev, struct cgbc_hwmon_data * nb_sensors = data[0]; hwmon->sensors = devm_kzalloc(dev, sizeof(*hwmon->sensors) * nb_sensors, GFP_KERNEL); + if (!hwmon->sensors) + return -ENOMEM; + sensor = hwmon->sensors; for (i = 0; i < nb_sensors; i++) { From 8dcc66ad379ec0642fb281c45ccfd7d2d366e53f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 18 Oct 2025 06:04:57 -0700 Subject: [PATCH 0162/1075] hwmon: (sht3x) Fix error handling Handling of errors when reading status, temperature, and humidity returns the error number as negative attribute value. Fix it up by returning the error as return value. Fixes: a0ac418c6007c ("hwmon: (sht3x) convert some of sysfs interface to hwmon") Cc: JuenKit Yip Signed-off-by: Guenter Roeck --- drivers/hwmon/sht3x.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/hwmon/sht3x.c b/drivers/hwmon/sht3x.c index 557ad3e7752a..f36c0229328f 100644 --- a/drivers/hwmon/sht3x.c +++ b/drivers/hwmon/sht3x.c @@ -291,24 +291,26 @@ static struct sht3x_data *sht3x_update_client(struct device *dev) return data; } -static int temp1_input_read(struct device *dev) +static int temp1_input_read(struct device *dev, long *temp) { struct sht3x_data *data = sht3x_update_client(dev); if (IS_ERR(data)) return PTR_ERR(data); - return data->temperature; + *temp = data->temperature; + return 0; } -static int humidity1_input_read(struct device *dev) +static int humidity1_input_read(struct device *dev, long *humidity) { struct sht3x_data *data = sht3x_update_client(dev); if (IS_ERR(data)) return PTR_ERR(data); - return data->humidity; + *humidity = data->humidity; + return 0; } /* @@ -706,6 +708,7 @@ static int sht3x_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { enum sht3x_limits index; + int ret; switch (type) { case hwmon_chip: @@ -720,10 +723,12 @@ static int sht3x_read(struct device *dev, enum hwmon_sensor_types type, case hwmon_temp: switch (attr) { case hwmon_temp_input: - *val = temp1_input_read(dev); - break; + return temp1_input_read(dev, val); case hwmon_temp_alarm: - *val = temp1_alarm_read(dev); + ret = temp1_alarm_read(dev); + if (ret < 0) + return ret; + *val = ret; break; case hwmon_temp_max: index = limit_max; @@ -748,10 +753,12 @@ static int sht3x_read(struct device *dev, enum hwmon_sensor_types type, case hwmon_humidity: switch (attr) { case hwmon_humidity_input: - *val = humidity1_input_read(dev); - break; + return humidity1_input_read(dev, val); case hwmon_humidity_alarm: - *val = humidity1_alarm_read(dev); + ret = humidity1_alarm_read(dev); + if (ret < 0) + return ret; + *val = ret; break; case hwmon_humidity_max: index = limit_max; From e607ef686ab95fbcb0dfd16f49aea7918be626e1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 16 Oct 2025 12:54:21 +0200 Subject: [PATCH 0163/1075] smb: client: allocate enough space for MR WRs and ib_drain_qp() The IB_WR_REG_MR and IB_WR_LOCAL_INV operations for smbdirect_mr_io structures should never fail because the submission or completion queues are too small. So we allocate more send_wr depending on the (local) max number of MRs. While there also add additional space for ib_drain_qp(). This should make sure ib_post_send() will never fail because the submission queue is full. Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Fixes: cc55f65dd352 ("smb: client: make use of common smbdirect_socket_parameters") Cc: stable@vger.kernel.org Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 49e2df3ad1f0..b1218ea4aa8b 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1767,6 +1767,7 @@ static struct smbd_connection *_smbd_get_connection( struct smbdirect_socket *sc; struct smbdirect_socket_parameters *sp; struct rdma_conn_param conn_param; + struct ib_qp_cap qp_cap; struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; struct ib_port_immutable port_immutable; @@ -1838,6 +1839,25 @@ static struct smbd_connection *_smbd_get_connection( goto config_failed; } + sp->responder_resources = + min_t(u8, sp->responder_resources, + sc->ib.dev->attrs.max_qp_rd_atom); + log_rdma_mr(INFO, "responder_resources=%d\n", + sp->responder_resources); + + /* + * We use allocate sp->responder_resources * 2 MRs + * and each MR needs WRs for REG and INV, so + * we use '* 4'. + * + * +1 for ib_drain_qp() + */ + memset(&qp_cap, 0, sizeof(qp_cap)); + qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1; + qp_cap.max_recv_wr = sp->recv_credit_max + 1; + qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; + qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); if (IS_ERR(sc->ib.pd)) { rc = PTR_ERR(sc->ib.pd); @@ -1848,7 +1868,7 @@ static struct smbd_connection *_smbd_get_connection( sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->send_credit_target, IB_POLL_SOFTIRQ); + qp_cap.max_send_wr, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.send_cq)) { sc->ib.send_cq = NULL; goto alloc_cq_failed; @@ -1856,7 +1876,7 @@ static struct smbd_connection *_smbd_get_connection( sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->recv_credit_max, IB_POLL_SOFTIRQ); + qp_cap.max_recv_wr, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.recv_cq)) { sc->ib.recv_cq = NULL; goto alloc_cq_failed; @@ -1865,11 +1885,7 @@ static struct smbd_connection *_smbd_get_connection( memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smbd_qp_async_error_upcall; qp_attr.qp_context = sc; - qp_attr.cap.max_send_wr = sp->send_credit_target; - qp_attr.cap.max_recv_wr = sp->recv_credit_max; - qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; - qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; - qp_attr.cap.max_inline_data = 0; + qp_attr.cap = qp_cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = sc->ib.send_cq; @@ -1883,12 +1899,6 @@ static struct smbd_connection *_smbd_get_connection( } sc->ib.qp = sc->rdma.cm_id->qp; - sp->responder_resources = - min_t(u8, sp->responder_resources, - sc->ib.dev->attrs.max_qp_rd_atom); - log_rdma_mr(INFO, "responder_resources=%d\n", - sp->responder_resources); - memset(&conn_param, 0, sizeof(conn_param)); conn_param.initiator_depth = sp->initiator_depth; conn_param.responder_resources = sp->responder_resources; From 103541e6a5854b08a25e4caa61e990af1009a52e Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 2 Oct 2025 08:22:35 +0000 Subject: [PATCH 0164/1075] rv: Fully convert enabled_monitors to use list_head as iterator The callbacks in enabled_monitors_seq_ops are inconsistent. Some treat the iterator as struct rv_monitor *, while others treat the iterator as struct list_head *. This causes a wrong type cast and crashes the system as reported by Nathan. Convert everything to use struct list_head * as iterator. This also makes enabled_monitors consistent with available_monitors. Fixes: de090d1ccae1 ("rv: Fix wrong type cast in enabled_monitors_next()") Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/linux-trace-kernel/20250923002004.GA2836051@ax162/ Signed-off-by: Nam Cao Cc: stable@vger.kernel.org Reviewed-by: Gabriele Monaco Link: https://lore.kernel.org/r/20251002082235.973099-1-namcao@linutronix.de Signed-off-by: Gabriele Monaco --- kernel/trace/rv/rv.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 48338520376f..43e9ea473cda 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -501,7 +501,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) list_for_each_entry_continue(mon, &rv_monitors_list, list) { if (mon->enabled) - return mon; + return &mon->list; } return NULL; @@ -509,7 +509,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) { - struct rv_monitor *mon; + struct list_head *head; loff_t l; mutex_lock(&rv_interface_lock); @@ -517,15 +517,15 @@ static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) if (list_empty(&rv_monitors_list)) return NULL; - mon = list_entry(&rv_monitors_list, struct rv_monitor, list); + head = &rv_monitors_list; for (l = 0; l <= *pos; ) { - mon = enabled_monitors_next(m, mon, &l); - if (!mon) + head = enabled_monitors_next(m, head, &l); + if (!head) break; } - return mon; + return head; } /* From 3d62f95bd8450cebb4a4741bf83949cd54edd4a3 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 2 Oct 2025 08:23:17 +0000 Subject: [PATCH 0165/1075] rv: Make rtapp/pagefault monitor depends on CONFIG_MMU There is no page fault without MMU. Compiling the rtapp/pagefault monitor without CONFIG_MMU fails as page fault tracepoints' definitions are not available. Make rtapp/pagefault monitor depends on CONFIG_MMU. Fixes: 9162620eb604 ("rv: Add rtapp_pagefault monitor") Signed-off-by: Nam Cao Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509260455.6Z9Vkty4-lkp@intel.com/ Cc: stable@vger.kernel.org Reviewed-by: Gabriele Monaco Link: https://lore.kernel.org/r/20251002082317.973839-1-namcao@linutronix.de Signed-off-by: Gabriele Monaco --- kernel/trace/rv/monitors/pagefault/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/rv/monitors/pagefault/Kconfig b/kernel/trace/rv/monitors/pagefault/Kconfig index 5e16625f1653..0e013f00c33b 100644 --- a/kernel/trace/rv/monitors/pagefault/Kconfig +++ b/kernel/trace/rv/monitors/pagefault/Kconfig @@ -5,6 +5,7 @@ config RV_MON_PAGEFAULT select RV_LTL_MONITOR depends on RV_MON_RTAPP depends on X86 || RISCV + depends on MMU default y select LTL_MON_EVENTS_ID bool "pagefault monitor" From 607844761454e3c17e928002e126ccf21c83f6aa Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Wed, 24 Sep 2025 18:30:14 +0530 Subject: [PATCH 0166/1075] wifi: mac80211: reset FILS discovery and unsol probe resp intervals When ieee80211_stop_ap() deletes the FILS discovery and unsolicited broadcast probe response templates, the associated interval values are not reset. This can lead to drivers subsequently operating with the non-zero values, leading to unexpected behavior. Trigger repeated retrieval attempts of the FILS discovery template in ath12k, resulting in excessive log messages such as: mac vdev 0 failed to retrieve FILS discovery template mac vdev 4 failed to retrieve FILS discovery template Fix this by resetting the intervals in ieee80211_stop_ap() to ensure proper cleanup of FILS discovery and unsolicited broadcast probe response templates. Fixes: 295b02c4be74 ("mac80211: Add FILS discovery support") Fixes: 632189a0180f ("mac80211: Unsolicited broadcast probe response support") Signed-off-by: Aloka Dixit Signed-off-by: Aaradhana Sahu Link: https://patch.msgid.link/20250924130014.2575533-1-aaradhana.sahu@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d9aca1c3c097..c52b0456039d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1876,6 +1876,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; + link_conf->fils_discovery.min_interval = 0; + link_conf->fils_discovery.max_interval = 0; + link_conf->unsol_bcast_probe_resp_interval = 0; __sta_info_flush(sdata, true, link_id, NULL); From a2a69add80411dd295c9088c1bcf925b1f4e53d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 3 Oct 2025 14:51:26 +0200 Subject: [PATCH 0167/1075] bcma: don't register devices disabled in OF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some bus devices can be marked as disabled for specific SoCs or models. Those should not be registered to avoid probing them. Signed-off-by: Rafał Miłecki Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251003125126.27950-1-zajec5@gmail.com Signed-off-by: Johannes Berg --- drivers/bcma/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 6ecfc821cf83..72f045e6ed51 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -294,6 +294,8 @@ static int bcma_register_devices(struct bcma_bus *bus) int err; list_for_each_entry(core, &bus->cores, list) { + struct device_node *np; + /* We support that core ourselves */ switch (core->id.id) { case BCMA_CORE_4706_CHIPCOMMON: @@ -311,6 +313,10 @@ static int bcma_register_devices(struct bcma_bus *bus) if (bcma_is_core_needed_early(core->id.id)) continue; + np = core->dev.of_node; + if (np && !of_device_is_available(np)) + continue; + /* Only first GMAC core on BCM4706 is connected and working */ if (core->id.id == BCMA_CORE_4706_MAC_GBIT && core->core_unit > 0) From 1e1801cab6c7f302baec2a0fe3afe25458d0be7e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 11 Oct 2025 00:57:35 +0100 Subject: [PATCH 0168/1075] MAINTAINERS: wcn36xx: Add linux-wireless list The wcn36xx is a wireless device but doesn't have the wireless list in its MAINTAINERS entry. Add it. Signed-off-by: Dr. David Alan Gilbert Acked-by: Jeff Johnson Link: https://patch.msgid.link/20251010235735.350638-1-linux@treblig.org Signed-off-by: Johannes Berg --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..91bf59792060 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21317,6 +21317,7 @@ F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER M: Loic Poulain L: wcn36xx@lists.infradead.org +L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx F: drivers/net/wireless/ath/wcn36xx/ From 3776c685ebe5f43e9060af06872661de55e80b9a Mon Sep 17 00:00:00 2001 From: Gokul Sivakumar Date: Mon, 13 Oct 2025 15:58:19 +0530 Subject: [PATCH 0169/1075] wifi: brcmfmac: fix crash while sending Action Frames in standalone AP Mode Currently, whenever there is a need to transmit an Action frame, the brcmfmac driver always uses the P2P vif to send the "actframe" IOVAR to firmware. The P2P interfaces were available when wpa_supplicant is managing the wlan interface. However, the P2P interfaces are not created/initialized when only hostapd is managing the wlan interface. And if hostapd receives an ANQP Query REQ Action frame even from an un-associated STA, the brcmfmac driver tries to use an uninitialized P2P vif pointer for sending the IOVAR to firmware. This NULL pointer dereferencing triggers a driver crash. [ 1417.074538] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [...] [ 1417.075188] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT) [...] [ 1417.075653] Call trace: [ 1417.075662] brcmf_p2p_send_action_frame+0x23c/0xc58 [brcmfmac] [ 1417.075738] brcmf_cfg80211_mgmt_tx+0x304/0x5c0 [brcmfmac] [ 1417.075810] cfg80211_mlme_mgmt_tx+0x1b0/0x428 [cfg80211] [ 1417.076067] nl80211_tx_mgmt+0x238/0x388 [cfg80211] [ 1417.076281] genl_family_rcv_msg_doit+0xe0/0x158 [ 1417.076302] genl_rcv_msg+0x220/0x2a0 [ 1417.076317] netlink_rcv_skb+0x68/0x140 [ 1417.076330] genl_rcv+0x40/0x60 [ 1417.076343] netlink_unicast+0x330/0x3b8 [ 1417.076357] netlink_sendmsg+0x19c/0x3f8 [ 1417.076370] __sock_sendmsg+0x64/0xc0 [ 1417.076391] ____sys_sendmsg+0x268/0x2a0 [ 1417.076408] ___sys_sendmsg+0xb8/0x118 [ 1417.076427] __sys_sendmsg+0x90/0xf8 [ 1417.076445] __arm64_sys_sendmsg+0x2c/0x40 [ 1417.076465] invoke_syscall+0x50/0x120 [ 1417.076486] el0_svc_common.constprop.0+0x48/0xf0 [ 1417.076506] do_el0_svc+0x24/0x38 [ 1417.076525] el0_svc+0x30/0x100 [ 1417.076548] el0t_64_sync_handler+0x100/0x130 [ 1417.076569] el0t_64_sync+0x190/0x198 [ 1417.076589] Code: f9401e80 aa1603e2 f9403be1 5280e483 (f9400000) Fix this, by always using the vif corresponding to the wdev on which the Action frame Transmission request was initiated by the userspace. This way, even if P2P vif is not available, the IOVAR is sent to firmware on AP vif and the ANQP Query RESP Action frame is transmitted without crashing the driver. Move init_completion() for "send_af_done" from brcmf_p2p_create_p2pdev() to brcmf_p2p_attach(). Because the former function would not get executed when only hostapd is managing wlan interface, and it is not safe to do reinit_completion() later in brcmf_p2p_tx_action_frame(), without any prior init_completion(). And in the brcmf_p2p_tx_action_frame() function, the condition check for P2P Presence response frame is not needed, since the wpa_supplicant is properly sending the P2P Presense Response frame on the P2P-GO vif instead of the P2P-Device vif. Cc: stable@vger.kernel.org Fixes: 18e2f61db3b7 ("brcmfmac: P2P action frame tx") Signed-off-by: Gokul Sivakumar Acked-by: Arend van Spriel Link: https://patch.msgid.link/20251013102819.9727-1-gokulkumar.sivakumar@infineon.com [Cc stable] Signed-off-by: Johannes Berg --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 3 +- .../broadcom/brcm80211/brcmfmac/p2p.c | 28 +++++++------------ .../broadcom/brcm80211/brcmfmac/p2p.h | 3 +- 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 8afaffe31031..bb96b87b2a6e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -5627,8 +5627,7 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, *cookie, le16_to_cpu(action_frame->len), le32_to_cpu(af_params->channel)); - ack = brcmf_p2p_send_action_frame(cfg, cfg_to_ndev(cfg), - af_params); + ack = brcmf_p2p_send_action_frame(vif->ifp, af_params); cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, ack, GFP_KERNEL); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 0dc9d28cd77b..e1752a513c73 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -1529,6 +1529,7 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, /** * brcmf_p2p_tx_action_frame() - send action frame over fil. * + * @ifp: interface to transmit on. * @p2p: p2p info struct for vif. * @af_params: action frame data/info. * @@ -1538,12 +1539,11 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, * The WLC_E_ACTION_FRAME_COMPLETE event will be received when the action * frame is transmitted. */ -static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, +static s32 brcmf_p2p_tx_action_frame(struct brcmf_if *ifp, + struct brcmf_p2p_info *p2p, struct brcmf_fil_af_params_le *af_params) { struct brcmf_pub *drvr = p2p->cfg->pub; - struct brcmf_cfg80211_vif *vif; - struct brcmf_p2p_action_frame *p2p_af; s32 err = 0; brcmf_dbg(TRACE, "Enter\n"); @@ -1552,14 +1552,7 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, clear_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status); clear_bit(BRCMF_P2P_STATUS_ACTION_TX_NOACK, &p2p->status); - /* check if it is a p2p_presence response */ - p2p_af = (struct brcmf_p2p_action_frame *)af_params->action_frame.data; - if (p2p_af->subtype == P2P_AF_PRESENCE_RSP) - vif = p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif; - else - vif = p2p->bss_idx[P2PAPI_BSSCFG_DEVICE].vif; - - err = brcmf_fil_bsscfg_data_set(vif->ifp, "actframe", af_params, + err = brcmf_fil_bsscfg_data_set(ifp, "actframe", af_params, sizeof(*af_params)); if (err) { bphy_err(drvr, " sending action frame has failed\n"); @@ -1711,16 +1704,14 @@ static bool brcmf_p2p_check_dwell_overflow(u32 requested_dwell, /** * brcmf_p2p_send_action_frame() - send action frame . * - * @cfg: driver private data for cfg80211 interface. - * @ndev: net device to transmit on. + * @ifp: interface to transmit on. * @af_params: configuration data for action frame. */ -bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, - struct net_device *ndev, +bool brcmf_p2p_send_action_frame(struct brcmf_if *ifp, struct brcmf_fil_af_params_le *af_params) { + struct brcmf_cfg80211_info *cfg = ifp->drvr->config; struct brcmf_p2p_info *p2p = &cfg->p2p; - struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_fil_action_frame_le *action_frame; struct brcmf_config_af_params config_af_params; struct afx_hdl *afx_hdl = &p2p->afx_hdl; @@ -1857,7 +1848,7 @@ bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, if (af_params->channel) msleep(P2P_AF_RETRY_DELAY_TIME); - ack = !brcmf_p2p_tx_action_frame(p2p, af_params); + ack = !brcmf_p2p_tx_action_frame(ifp, p2p, af_params); tx_retry++; dwell_overflow = brcmf_p2p_check_dwell_overflow(requested_dwell, dwell_jiffies); @@ -2217,7 +2208,6 @@ static struct wireless_dev *brcmf_p2p_create_p2pdev(struct brcmf_p2p_info *p2p, WARN_ON(p2p_ifp->bsscfgidx != bsscfgidx); - init_completion(&p2p->send_af_done); INIT_WORK(&p2p->afx_hdl.afx_work, brcmf_p2p_afx_handler); init_completion(&p2p->afx_hdl.act_frm_scan); init_completion(&p2p->wait_next_af); @@ -2513,6 +2503,8 @@ s32 brcmf_p2p_attach(struct brcmf_cfg80211_info *cfg, bool p2pdev_forced) pri_ifp = brcmf_get_ifp(cfg->pub, 0); p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif = pri_ifp->vif; + init_completion(&p2p->send_af_done); + if (p2pdev_forced) { err_ptr = brcmf_p2p_create_p2pdev(p2p, NULL, NULL); if (IS_ERR(err_ptr)) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h index d2ecee565bf2..d3137ebd7158 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h @@ -168,8 +168,7 @@ int brcmf_p2p_notify_action_frame_rx(struct brcmf_if *ifp, int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, const struct brcmf_event_msg *e, void *data); -bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, - struct net_device *ndev, +bool brcmf_p2p_send_action_frame(struct brcmf_if *ifp, struct brcmf_fil_af_params_le *af_params); bool brcmf_p2p_scan_finding_common_channel(struct brcmf_cfg80211_info *cfg, struct brcmf_bss_info_le *bi); From ed6a47346ec69e7f1659e0a1a3558293f60d5dd7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:54:27 +0300 Subject: [PATCH 0170/1075] wifi: mac80211: fix key tailroom accounting leak For keys added by ieee80211_gtk_rekey_add(), we assume that they're already present in the hardware and set the flag KEY_FLAG_UPLOADED_TO_HARDWARE. However, setting this flag needs to be paired with decrementing the tailroom needed, which was missed. Fixes: f52a0b408ed1 ("wifi: mac80211: mark keys as uploaded when added by the driver") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019115358.c88eafb4083e.I69e9d4d78a756a133668c55b5570cf15a4b0e6a4@changeid Signed-off-by: Johannes Berg --- net/mac80211/key.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index b14e9cd9713f..d5da7ccea66e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -508,11 +508,16 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ret = ieee80211_key_enable_hw_accel(new); } } else { - if (!new->local->wowlan) + if (!new->local->wowlan) { ret = ieee80211_key_enable_hw_accel(new); - else if (link_id < 0 || !sdata->vif.active_links || - BIT(link_id) & sdata->vif.active_links) + } else if (link_id < 0 || !sdata->vif.active_links || + BIT(link_id) & sdata->vif.active_links) { new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + if (!(new->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE | + IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) + decrease_tailroom_need_count(sdata, 1); + } } if (ret) From 249e1443e3d57e059925bdb698f53e4d008fc106 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Oct 2025 10:57:45 +0300 Subject: [PATCH 0171/1075] wifi: nl80211: call kfree without a NULL check Coverity is unhappy because we may leak old_radio_rts_threshold. Since this pointer is only valid in the context of the function and kfree is NULL pointer safe, don't check and just call kfree. Note that somehow, we were checking old_rts_threshold to free old_radio_rts_threshold which is a bit odd. Fixes: 264637941cf4 ("wifi: cfg80211: Add Support to Set RTS Threshold for each Radio") Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Link: https://patch.msgid.link/20251020075745.44168-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 346dfd2bd987..03d07b54359a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4136,8 +4136,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.txq_quantum = old_txq_quantum; } - if (old_rts_threshold) - kfree(old_radio_rts_threshold); + kfree(old_radio_rts_threshold); return result; } From 71c07570b918f000de5d0f7f1bf17a2887e303b5 Mon Sep 17 00:00:00 2001 From: Renjun Wang Date: Sun, 19 Oct 2025 18:44:38 +0800 Subject: [PATCH 0172/1075] USB: serial: option: add UNISOC UIS7720 Add support for UNISOC (Spreadtrum) UIS7720 (A7720) module. T: Bus=05 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4064 Rev=04.04 S: Manufacturer=Unisoc-phone S: Product=Unisoc-phone S: SerialNumber=0123456789ABCDEF C: #Ifs= 9 Cfg#= 1 Atr=c0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0&1: RNDIS, 2: LOG, 3: DIAG, 4&5: AT Ports, 6&7: AT2 Ports, 8: ADB Signed-off-by: Renjun Wang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 62e984d20e59..ed1328648a73 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -617,6 +617,7 @@ static void option_instat_callback(struct urb *urb); #define UNISOC_VENDOR_ID 0x1782 /* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */ #define TOZED_PRODUCT_LT70C 0x4055 +#define UNISOC_PRODUCT_UIS7720 0x4064 /* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ #define LUAT_PRODUCT_AIR720U 0x4e00 @@ -2466,6 +2467,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, UNISOC_PRODUCT_UIS7720, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ .driver_info = NCTRL(1) }, From e46ee2f07e5848d7ec7aec38b72476dc7941b048 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 20 Oct 2025 16:54:13 +0200 Subject: [PATCH 0173/1075] spi: intel: Add support for 128M component density With the recent hardware the flash component density can be increased to 128M. Update the driver to support this. While there log a warning if we encounter an unsupported value in this field. Signed-off-by: Mika Westerberg Link: https://patch.msgid.link/20251020145415.3377022-2-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/spi/spi-intel.c b/drivers/spi/spi-intel.c index 13bbb2133507..1775ad39e633 100644 --- a/drivers/spi/spi-intel.c +++ b/drivers/spi/spi-intel.c @@ -132,6 +132,7 @@ #define FLCOMP_C0DEN_16M 0x05 #define FLCOMP_C0DEN_32M 0x06 #define FLCOMP_C0DEN_64M 0x07 +#define FLCOMP_C0DEN_128M 0x08 #define INTEL_SPI_TIMEOUT 5000 /* ms */ #define INTEL_SPI_FIFO_SZ 64 @@ -1347,7 +1348,12 @@ static int intel_spi_read_desc(struct intel_spi *ispi) case FLCOMP_C0DEN_64M: ispi->chip0_size = SZ_64M; break; + case FLCOMP_C0DEN_128M: + ispi->chip0_size = SZ_128M; + break; default: + dev_warn(ispi->dev, "unsupported C0DEN: %#lx\n", + flcomp & FLCOMP_C0DEN_MASK); return -EINVAL; } From bc25c6e0a4880b5ad70c31fe1466f30c9e4c8f52 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 20 Oct 2025 16:54:14 +0200 Subject: [PATCH 0174/1075] spi: intel-pci: Add support for Arrow Lake-H SPI serial flash Add Intel Arrow Lake-H PCI ID to the driver list of supported devices. This is the same controller found in previous generations. Signed-off-by: Mika Westerberg Link: https://patch.msgid.link/20251020145415.3377022-3-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 4b63cb98df9c..49b4d3061197 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -79,6 +79,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x5794), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x7723), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7e23), (unsigned long)&cnl_info }, From f7e37affbc9085f2b77ccb6596521a44eabf7505 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 20 Oct 2025 16:54:15 +0200 Subject: [PATCH 0175/1075] spi: intel-pci: Add support for Intel Wildcat Lake SPI serial flash Add Intel Wildcat Lake SPI serial flash PCI ID to the list of supported devices. Signed-off-by: Mika Westerberg Link: https://patch.msgid.link/20251020145415.3377022-4-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 49b4d3061197..7765fb27c37c 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -75,6 +75,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x38a4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x43a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info }, + { PCI_VDEVICE(INTEL, 0x4d23), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info }, From 3a20c444cd123e820e10ae22eeaf00e189315aa1 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 17 Oct 2025 16:28:49 +0200 Subject: [PATCH 0176/1075] can: bxcan: bxcan_start_xmit(): use can_dev_dropped_skb() instead of can_dropped_invalid_skb() In addition to can_dropped_invalid_skb(), the helper function can_dev_dropped_skb() checks whether the device is in listen-only mode and discards the skb accordingly. Replace can_dropped_invalid_skb() by can_dev_dropped_skb() to also drop skbs in for listen-only mode. Reported-by: Marc Kleine-Budde Closes: https://lore.kernel.org/all/20251017-bizarre-enchanted-quokka-f3c704-mkl@pengutronix.de/ Fixes: f00647d8127b ("can: bxcan: add support for ST bxCAN controller") Link: https://patch.msgid.link/20251017-fix-skb-drop-check-v1-1-556665793fa4@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/bxcan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/bxcan.c b/drivers/net/can/bxcan.c index bfc60eb33dc3..333ad42ea73b 100644 --- a/drivers/net/can/bxcan.c +++ b/drivers/net/can/bxcan.c @@ -842,7 +842,7 @@ static netdev_tx_t bxcan_start_xmit(struct sk_buff *skb, u32 id; int i, j; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; if (bxcan_tx_busy(priv)) From 0bee15a5caf36fe513fdeee07fd4f0331e61c064 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 17 Oct 2025 16:28:49 +0200 Subject: [PATCH 0177/1075] can: esd: acc_start_xmit(): use can_dev_dropped_skb() instead of can_dropped_invalid_skb() In addition to can_dropped_invalid_skb(), the helper function can_dev_dropped_skb() checks whether the device is in listen-only mode and discards the skb accordingly. Replace can_dropped_invalid_skb() by can_dev_dropped_skb() to also drop skbs in for listen-only mode. Reported-by: Marc Kleine-Budde Closes: https://lore.kernel.org/all/20251017-bizarre-enchanted-quokka-f3c704-mkl@pengutronix.de/ Fixes: 9721866f07e1 ("can: esd: add support for esd GmbH PCIe/402 CAN interface family") Link: https://patch.msgid.link/20251017-fix-skb-drop-check-v1-2-556665793fa4@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/esd/esdacc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/esd/esdacc.c b/drivers/net/can/esd/esdacc.c index c80032bc1a52..73e66f9a3781 100644 --- a/drivers/net/can/esd/esdacc.c +++ b/drivers/net/can/esd/esdacc.c @@ -254,7 +254,7 @@ netdev_tx_t acc_start_xmit(struct sk_buff *skb, struct net_device *netdev) u32 acc_id; u32 acc_dlc; - if (can_dropped_invalid_skb(netdev, skb)) + if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; /* Access core->tx_fifo_tail only once because it may be changed From 3a3bc9bbb3a0287164a595787df0c70d91e77cfd Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 17 Oct 2025 16:28:49 +0200 Subject: [PATCH 0178/1075] can: rockchip-canfd: rkcanfd_start_xmit(): use can_dev_dropped_skb() instead of can_dropped_invalid_skb() In addition to can_dropped_invalid_skb(), the helper function can_dev_dropped_skb() checks whether the device is in listen-only mode and discards the skb accordingly. Replace can_dropped_invalid_skb() by can_dev_dropped_skb() to also drop skbs in for listen-only mode. Reported-by: Marc Kleine-Budde Closes: https://lore.kernel.org/all/20251017-bizarre-enchanted-quokka-f3c704-mkl@pengutronix.de/ Fixes: ff60bfbaf67f ("can: rockchip_canfd: add driver for Rockchip CAN-FD controller") Link: https://patch.msgid.link/20251017-fix-skb-drop-check-v1-3-556665793fa4@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/rockchip/rockchip_canfd-tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/rockchip/rockchip_canfd-tx.c b/drivers/net/can/rockchip/rockchip_canfd-tx.c index 865a15e033a9..12200dcfd338 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-tx.c +++ b/drivers/net/can/rockchip/rockchip_canfd-tx.c @@ -72,7 +72,7 @@ netdev_tx_t rkcanfd_start_xmit(struct sk_buff *skb, struct net_device *ndev) int err; u8 i; - if (can_dropped_invalid_skb(ndev, skb)) + if (can_dev_dropped_skb(ndev, skb)) return NETDEV_TX_OK; if (!netif_subqueue_maybe_stop(priv->ndev, 0, From 8e93ac51e4c6dc399fad59ec21f55f2cfb46d27c Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Mon, 20 Oct 2025 11:51:03 +0200 Subject: [PATCH 0179/1075] can: netlink: can_changelink(): allow disabling of automatic restart Since the commit c1f3f9797c1f ("can: netlink: can_changelink(): fix NULL pointer deref of struct can_priv::do_set_mode"), the automatic restart delay can only be set for devices that implement the restart handler struct can_priv::do_set_mode. As it makes no sense to configure a automatic restart for devices that doesn't support it. However, since systemd commit 13ce5d4632e3 ("network/can: properly handle CAN.RestartSec=0") [1], systemd-networkd correctly handles a restart delay of "0" (i.e. the restart is disabled). Which means that a disabled restart is always configured in the kernel. On systems with both changes active this causes that CAN interfaces that don't implement a restart handler cannot be brought up by systemd-networkd. Solve this problem by allowing a delay of "0" to be configured, even if the device does not implement a restart handler. [1] https://github.com/systemd/systemd/commit/13ce5d4632e395521e6205c954493c7fc1c4c6e0 Cc: stable@vger.kernel.org Cc: Andrei Lalaev Reported-by: Marc Kleine-Budde Closes: https://lore.kernel.org/all/20251020-certain-arrogant-vole-of-sunshine-141841-mkl@pengutronix.de Fixes: c1f3f9797c1f ("can: netlink: can_changelink(): fix NULL pointer deref of struct can_priv::do_set_mode") Link: https://patch.msgid.link/20251020-netlink-fix-restart-v1-1-3f53c7f8520b@pengutronix.de Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/netlink.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 0591406b6f32..6f83b87d54fc 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -452,7 +452,9 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], } if (data[IFLA_CAN_RESTART_MS]) { - if (!priv->do_set_mode) { + unsigned int restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); + + if (restart_ms != 0 && !priv->do_set_mode) { NL_SET_ERR_MSG(extack, "Device doesn't support restart from Bus Off"); return -EOPNOTSUPP; @@ -461,7 +463,7 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], /* Do not allow changing restart delay while running */ if (dev->flags & IFF_UP) return -EBUSY; - priv->restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); + priv->restart_ms = restart_ms; } if (data[IFLA_CAN_RESTART]) { From 4ec703ec0c384a2199808c4eb2e9037236285a8d Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sat, 18 Oct 2025 12:32:54 -0700 Subject: [PATCH 0180/1075] io_uring: fix incorrect unlikely() usage in io_waitid_prep() The negation operator is incorrectly placed outside the unlikely() macro: if (!unlikely(iwa)) This inverts the compiler branch prediction hint, marking the NULL case as likely instead of unlikely. The intent is to indicate that allocation failures are rare, consistent with common kernel patterns. Moving the negation inside unlikely(): if (unlikely(!iwa)) Fixes: 2b4fc4cd43f2 ("io_uring/waitid: setup async data in the prep handler") Signed-off-by: Alok Tiwari Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Caleb Sander Mateos Signed-off-by: Jens Axboe --- io_uring/waitid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index f25110fb1b12..53532ae6256c 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -250,7 +250,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; iwa = io_uring_alloc_async_data(NULL, req); - if (!unlikely(iwa)) + if (unlikely(!iwa)) return -ENOMEM; iwa->req = req; From 81ccca31214e11ea2b537fd35d4f66d7cf46268e Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 10 Oct 2025 10:09:00 +0200 Subject: [PATCH 0181/1075] nbd: override creds to kernel when calling sock_{send,recv}msg() sock_{send,recv}msg() internally calls security_socket_{send,recv}msg(), which does security checks (e.g. SELinux) for socket access against the current task. However, _sock_xmit() in drivers/block/nbd.c may be called indirectly from a userspace syscall, where the NBD socket access would be incorrectly checked against the calling userspace task (which simply tries to read/write a file that happens to reside on an NBD device). To fix this, temporarily override creds to kernel ones before calling the sock_*() functions. This allows the security modules to recognize this as internal access by the kernel, which will normally be allowed. A way to trigger the issue is to do the following (on a system with SELinux set to enforcing): ### Create nbd device: truncate -s 256M /tmp/testfile nbd-server localhost:10809 /tmp/testfile ### Connect to the nbd server: nbd-client localhost ### Create mdraid array mdadm --create -l 1 -n 2 /dev/md/testarray /dev/nbd0 missing After these steps, assuming the SELinux policy doesn't allow the unexpected access pattern, errors will be visible on the kernel console: [ 142.204243] nbd0: detected capacity change from 0 to 524288 [ 165.189967] md: async del_gendisk mode will be removed in future, please upgrade to mdadm-4.5+ [ 165.252299] md/raid1:md127: active with 1 out of 2 mirrors [ 165.252725] md127: detected capacity change from 0 to 522240 [ 165.255434] block nbd0: Send control failed (result -13) [ 165.255718] block nbd0: Request send failed, requeueing [ 165.256006] block nbd0: Dead connection, failed to find a fallback [ 165.256041] block nbd0: Receive control failed (result -32) [ 165.256423] block nbd0: shutting down sockets [ 165.257196] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.257736] Buffer I/O error on dev md127, logical block 0, async page read [ 165.258263] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.259376] Buffer I/O error on dev md127, logical block 0, async page read [ 165.259920] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.260628] Buffer I/O error on dev md127, logical block 0, async page read [ 165.261661] ldm_validate_partition_table(): Disk read failed. [ 165.262108] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.262769] Buffer I/O error on dev md127, logical block 0, async page read [ 165.263697] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.264412] Buffer I/O error on dev md127, logical block 0, async page read [ 165.265412] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.265872] Buffer I/O error on dev md127, logical block 0, async page read [ 165.266378] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.267168] Buffer I/O error on dev md127, logical block 0, async page read [ 165.267564] md127: unable to read partition table [ 165.269581] I/O error, dev nbd0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.269960] Buffer I/O error on dev nbd0, logical block 0, async page read [ 165.270316] I/O error, dev nbd0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.270913] Buffer I/O error on dev nbd0, logical block 0, async page read [ 165.271253] I/O error, dev nbd0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.271809] Buffer I/O error on dev nbd0, logical block 0, async page read [ 165.272074] ldm_validate_partition_table(): Disk read failed. [ 165.272360] nbd0: unable to read partition table [ 165.289004] ldm_validate_partition_table(): Disk read failed. [ 165.289614] nbd0: unable to read partition table The corresponding SELinux denial on Fedora/RHEL will look like this (assuming it's not silenced): type=AVC msg=audit(1758104872.510:116): avc: denied { write } for pid=1908 comm="mdadm" laddr=::1 lport=32772 faddr=::1 fport=10809 scontext=system_u:system_r:mdadm_t:s0-s0:c0.c1023 tcontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tclass=tcp_socket permissive=0 The respective backtrace looks like this: @security[mdadm, -13, handshake_exit+221615650 handshake_exit+221615650 handshake_exit+221616465 security_socket_sendmsg+5 sock_sendmsg+106 handshake_exit+221616150 sock_sendmsg+5 __sock_xmit+162 nbd_send_cmd+597 nbd_handle_cmd+377 nbd_queue_rq+63 blk_mq_dispatch_rq_list+653 __blk_mq_do_dispatch_sched+184 __blk_mq_sched_dispatch_requests+333 blk_mq_sched_dispatch_requests+38 blk_mq_run_hw_queue+239 blk_mq_dispatch_plug_list+382 blk_mq_flush_plug_list.part.0+55 __blk_flush_plug+241 __submit_bio+353 submit_bio_noacct_nocheck+364 submit_bio_wait+84 __blkdev_direct_IO_simple+232 blkdev_read_iter+162 vfs_read+591 ksys_read+95 do_syscall_64+92 entry_SYSCALL_64_after_hwframe+120 ]: 1 The issue has started to appear since commit 060406c61c7c ("block: add plug while submitting IO"). Cc: Ming Lei Link: https://bugzilla.redhat.com/show_bug.cgi?id=2348878 Fixes: 060406c61c7c ("block: add plug while submitting IO") Signed-off-by: Ondrej Mosnacek Acked-by: Paul Moore Acked-by: Stephen Smalley Reviewed-by: Ming Lei Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1188f32a5e5e..a853c65ac65d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -52,6 +52,7 @@ static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); static struct workqueue_struct *nbd_del_wq; +static struct cred *nbd_cred; static int nbd_total_devices = 0; struct nbd_sock { @@ -554,6 +555,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, int result; struct msghdr msg = {} ; unsigned int noreclaim_flag; + const struct cred *old_cred; if (unlikely(!sock)) { dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -562,6 +564,8 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, return -EINVAL; } + old_cred = override_creds(nbd_cred); + msg.msg_iter = *iter; noreclaim_flag = memalloc_noreclaim_save(); @@ -586,6 +590,8 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, memalloc_noreclaim_restore(noreclaim_flag); + revert_creds(old_cred); + return result; } @@ -2677,7 +2683,15 @@ static int __init nbd_init(void) return -ENOMEM; } + nbd_cred = prepare_kernel_cred(&init_task); + if (!nbd_cred) { + destroy_workqueue(nbd_del_wq); + unregister_blkdev(NBD_MAJOR, "nbd"); + return -ENOMEM; + } + if (genl_register_family(&nbd_genl_family)) { + put_cred(nbd_cred); destroy_workqueue(nbd_del_wq); unregister_blkdev(NBD_MAJOR, "nbd"); return -EINVAL; @@ -2732,6 +2746,7 @@ static void __exit nbd_cleanup(void) /* Also wait for nbd_dev_remove_work() completes */ destroy_workqueue(nbd_del_wq); + put_cred(nbd_cred); idr_destroy(&nbd_index_idr); unregister_blkdev(NBD_MAJOR, "nbd"); } From a1978b692a3953241842a89eaa0026158f306cf1 Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Fri, 17 Oct 2025 17:10:53 +0530 Subject: [PATCH 0182/1075] PCI: dwc: Use custom pci_ops for root bus DBI vs ECAM config access When the vendor configuration space is 256MB aligned, the DesignWare PCIe host driver enables ECAM access and sets the DBI base to the start of the config space. This causes vendor drivers to incorrectly program iATU regions, as they rely on the DBI address for internal accesses. To fix this, avoid overwriting the DBI base when ECAM is enabled. Instead, introduce a custom pci_ops that accesses the DBI region directly for the root bus and uses ECAM for other buses. Fixes: f6fd357f7afb ("PCI: dwc: Prepare the driver for enabling ECAM mechanism using iATU 'CFG Shift Feature'") Reported-by: Ron Economos Closes: https://lore.kernel.org/all/eac81c57-1164-4d74-a1b4-6f353c577731@w6rz.net/ Suggested-by: Manivannan Sadhasivam Signed-off-by: Krishna Chaitanya Chundru [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Tested-by: Ron Economos Link: https://patch.msgid.link/20251017-ecam_fix-v1-1-f6faa3d0edf3@oss.qualcomm.com --- .../pci/controller/dwc/pcie-designware-host.c | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 20c9333bcb1c..e92513c5bda5 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -23,6 +23,7 @@ #include "pcie-designware.h" static struct pci_ops dw_pcie_ops; +static struct pci_ops dw_pcie_ecam_ops; static struct pci_ops dw_child_pcie_ops; #define DW_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ @@ -471,9 +472,6 @@ static int dw_pcie_create_ecam_window(struct dw_pcie_rp *pp, struct resource *re if (IS_ERR(pp->cfg)) return PTR_ERR(pp->cfg); - pci->dbi_base = pp->cfg->win; - pci->dbi_phys_addr = res->start; - return 0; } @@ -529,7 +527,7 @@ static int dw_pcie_host_get_resources(struct dw_pcie_rp *pp) if (ret) return ret; - pp->bridge->ops = (struct pci_ops *)&pci_generic_ecam_ops.pci_ops; + pp->bridge->ops = &dw_pcie_ecam_ops; pp->bridge->sysdata = pp->cfg; pp->cfg->priv = pp; } else { @@ -842,12 +840,34 @@ void __iomem *dw_pcie_own_conf_map_bus(struct pci_bus *bus, unsigned int devfn, } EXPORT_SYMBOL_GPL(dw_pcie_own_conf_map_bus); +static void __iomem *dw_pcie_ecam_conf_map_bus(struct pci_bus *bus, unsigned int devfn, int where) +{ + struct pci_config_window *cfg = bus->sysdata; + struct dw_pcie_rp *pp = cfg->priv; + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + unsigned int busn = bus->number; + + if (busn > 0) + return pci_ecam_map_bus(bus, devfn, where); + + if (PCI_SLOT(devfn) > 0) + return NULL; + + return pci->dbi_base + where; +} + static struct pci_ops dw_pcie_ops = { .map_bus = dw_pcie_own_conf_map_bus, .read = pci_generic_config_read, .write = pci_generic_config_write, }; +static struct pci_ops dw_pcie_ecam_ops = { + .map_bus = dw_pcie_ecam_conf_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); From fc2bc2623e3a099165b02d13567d21fabb5ea54d Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Fri, 17 Oct 2025 17:10:54 +0530 Subject: [PATCH 0183/1075] Revert "PCI: qcom: Prepare for the DWC ECAM enablement" This reverts commit 4660e50cf81800f82eeecf743ad1e3e97ab72190. Commit f6fd357f7afb ("PCI: dwc: Prepare the driver for enabling ECAM mechanism using iATU 'CFG Shift Feature'") enabled ECAM access by using the config space start as DBI address. However, this approach breaks vendor drivers that rely on the DBI address for internal accesses, especially when the vendor config space is 256MB aligned. To resolve this, avoid using the DBI as the start of config space and instead introduce a custom ECAM PCI ops implementation. Revert the qcom specific ECAM preparation logic in 4660e50cf818 ("PCI: qcom: Prepare for the DWC ECAM enablement") since it's no longer necessary. Signed-off-by: Krishna Chaitanya Chundru [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251017-ecam_fix-v1-2-f6faa3d0edf3@oss.qualcomm.com --- drivers/pci/controller/dwc/pcie-qcom.c | 68 -------------------------- 1 file changed, 68 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 805edbbfe7eb..6948824642dc 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -55,7 +55,6 @@ #define PARF_AXI_MSTR_WR_ADDR_HALT_V2 0x1a8 #define PARF_Q2A_FLUSH 0x1ac #define PARF_LTSSM 0x1b0 -#define PARF_SLV_DBI_ELBI 0x1b4 #define PARF_INT_ALL_STATUS 0x224 #define PARF_INT_ALL_CLEAR 0x228 #define PARF_INT_ALL_MASK 0x22c @@ -65,16 +64,6 @@ #define PARF_DBI_BASE_ADDR_V2_HI 0x354 #define PARF_SLV_ADDR_SPACE_SIZE_V2 0x358 #define PARF_SLV_ADDR_SPACE_SIZE_V2_HI 0x35c -#define PARF_BLOCK_SLV_AXI_WR_BASE 0x360 -#define PARF_BLOCK_SLV_AXI_WR_BASE_HI 0x364 -#define PARF_BLOCK_SLV_AXI_WR_LIMIT 0x368 -#define PARF_BLOCK_SLV_AXI_WR_LIMIT_HI 0x36c -#define PARF_BLOCK_SLV_AXI_RD_BASE 0x370 -#define PARF_BLOCK_SLV_AXI_RD_BASE_HI 0x374 -#define PARF_BLOCK_SLV_AXI_RD_LIMIT 0x378 -#define PARF_BLOCK_SLV_AXI_RD_LIMIT_HI 0x37c -#define PARF_ECAM_BASE 0x380 -#define PARF_ECAM_BASE_HI 0x384 #define PARF_NO_SNOOP_OVERRIDE 0x3d4 #define PARF_ATU_BASE_ADDR 0x634 #define PARF_ATU_BASE_ADDR_HI 0x638 @@ -98,7 +87,6 @@ /* PARF_SYS_CTRL register fields */ #define MAC_PHY_POWERDOWN_IN_P2_D_MUX_EN BIT(29) -#define PCIE_ECAM_BLOCKER_EN BIT(26) #define MST_WAKEUP_EN BIT(13) #define SLV_WAKEUP_EN BIT(12) #define MSTR_ACLK_CGC_DIS BIT(10) @@ -146,9 +134,6 @@ /* PARF_LTSSM register fields */ #define LTSSM_EN BIT(8) -/* PARF_SLV_DBI_ELBI */ -#define SLV_DBI_ELBI_ADDR_BASE GENMASK(11, 0) - /* PARF_INT_ALL_{STATUS/CLEAR/MASK} register fields */ #define PARF_INT_ALL_LINK_UP BIT(13) #define PARF_INT_MSI_DEV_0_7 GENMASK(30, 23) @@ -326,47 +311,6 @@ static void qcom_ep_reset_deassert(struct qcom_pcie *pcie) qcom_perst_assert(pcie, false); } -static void qcom_pci_config_ecam(struct dw_pcie_rp *pp) -{ - struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct qcom_pcie *pcie = to_qcom_pcie(pci); - u64 addr, addr_end; - u32 val; - - writel_relaxed(lower_32_bits(pci->dbi_phys_addr), pcie->parf + PARF_ECAM_BASE); - writel_relaxed(upper_32_bits(pci->dbi_phys_addr), pcie->parf + PARF_ECAM_BASE_HI); - - /* - * The only device on the root bus is a single Root Port. If we try to - * access any devices other than Device/Function 00.0 on Bus 0, the TLP - * will go outside of the controller to the PCI bus. But with CFG Shift - * Feature (ECAM) enabled in iATU, there is no guarantee that the - * response is going to be all F's. Hence, to make sure that the - * requester gets all F's response for accesses other than the Root - * Port, configure iATU to block the transactions starting from - * function 1 of the root bus to the end of the root bus (i.e., from - * dbi_base + 4KB to dbi_base + 1MB). - */ - addr = pci->dbi_phys_addr + SZ_4K; - writel_relaxed(lower_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_WR_BASE); - writel_relaxed(upper_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_WR_BASE_HI); - - writel_relaxed(lower_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_RD_BASE); - writel_relaxed(upper_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_RD_BASE_HI); - - addr_end = pci->dbi_phys_addr + SZ_1M - 1; - - writel_relaxed(lower_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_WR_LIMIT); - writel_relaxed(upper_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_WR_LIMIT_HI); - - writel_relaxed(lower_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_RD_LIMIT); - writel_relaxed(upper_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_RD_LIMIT_HI); - - val = readl_relaxed(pcie->parf + PARF_SYS_CTRL); - val |= PCIE_ECAM_BLOCKER_EN; - writel_relaxed(val, pcie->parf + PARF_SYS_CTRL); -} - static int qcom_pcie_start_link(struct dw_pcie *pci) { struct qcom_pcie *pcie = to_qcom_pcie(pci); @@ -1320,7 +1264,6 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct qcom_pcie *pcie = to_qcom_pcie(pci); - u16 offset; int ret; qcom_ep_reset_assert(pcie); @@ -1329,17 +1272,6 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) if (ret) return ret; - if (pp->ecam_enabled) { - /* - * Override ELBI when ECAM is enabled, as when ECAM is enabled, - * ELBI moves under the 'config' space. - */ - offset = FIELD_GET(SLV_DBI_ELBI_ADDR_BASE, readl(pcie->parf + PARF_SLV_DBI_ELBI)); - pci->elbi_base = pci->dbi_base + offset; - - qcom_pci_config_ecam(pp); - } - ret = qcom_pcie_phy_power_on(pcie); if (ret) goto err_deinit; From a0b7780602b1b196f47e527fec82166a7e67c4d0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 7 Oct 2025 14:48:00 -0700 Subject: [PATCH 0184/1075] scsi: core: Fix a regression triggered by scsi_host_busy() Commit 995412e23bb2 ("blk-mq: Replace tags->lock with SRCU for tag iterators") introduced the following regression: Call trace: __srcu_read_lock+0x30/0x80 (P) blk_mq_tagset_busy_iter+0x44/0x300 scsi_host_busy+0x38/0x70 ufshcd_print_host_state+0x34/0x1bc ufshcd_link_startup.constprop.0+0xe4/0x2e0 ufshcd_init+0x944/0xf80 ufshcd_pltfrm_init+0x504/0x820 ufs_rockchip_probe+0x2c/0x88 platform_probe+0x5c/0xa4 really_probe+0xc0/0x38c __driver_probe_device+0x7c/0x150 driver_probe_device+0x40/0x120 __driver_attach+0xc8/0x1e0 bus_for_each_dev+0x7c/0xdc driver_attach+0x24/0x30 bus_add_driver+0x110/0x230 driver_register+0x68/0x130 __platform_driver_register+0x20/0x2c ufs_rockchip_pltform_init+0x1c/0x28 do_one_initcall+0x60/0x1e0 kernel_init_freeable+0x248/0x2c4 kernel_init+0x20/0x140 ret_from_fork+0x10/0x20 Fix this regression by making scsi_host_busy() check whether the SCSI host tag set has already been initialized. tag_set->ops is set by scsi_mq_setup_tags() just before blk_mq_alloc_tag_set() is called. This fix is based on the assumption that scsi_host_busy() and scsi_mq_setup_tags() calls are serialized. This is the case in the UFS driver. Reported-by: Sebastian Reichel Closes: https://lore.kernel.org/linux-block/pnezafputodmqlpumwfbn644ohjybouveehcjhz2hmhtcf2rka@sdhoiivync4y/ Cc: Ming Lei Cc: Jens Axboe Signed-off-by: Bart Van Assche Reviewed-by: Ming Lei Tested-by: Sebastian Reichel Link: https://patch.msgid.link/20251007214800.1678255-1-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/hosts.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index cc5d05dc395c..17173239301e 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -611,8 +611,9 @@ int scsi_host_busy(struct Scsi_Host *shost) { int cnt = 0; - blk_mq_tagset_busy_iter(&shost->tag_set, - scsi_host_check_in_flight, &cnt); + if (shost->tag_set.ops) + blk_mq_tagset_busy_iter(&shost->tag_set, + scsi_host_check_in_flight, &cnt); return cnt; } EXPORT_SYMBOL(scsi_host_busy); From 19de7113bfac33ba92c004a9b510612bb745cfa0 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 16 Oct 2025 08:34:19 -0500 Subject: [PATCH 0185/1075] x86,fs/resctrl: Fix NULL pointer dereference with events force-disabled in mbm_event mode The following NULL pointer dereference is encountered on mount of resctrl fs after booting a system that supports assignable counters with the "rdt=!mbmtotal,!mbmlocal" kernel parameters: BUG: kernel NULL pointer dereference, address: 0000000000000008 RIP: 0010:mbm_cntr_get Call Trace: rdtgroup_assign_cntr_event rdtgroup_assign_cntrs rdt_get_tree Specifying the kernel parameter "rdt=!mbmtotal,!mbmlocal" effectively disables the legacy X86_FEATURE_CQM_MBM_TOTAL and X86_FEATURE_CQM_MBM_LOCAL features and the MBM events they represent. This results in the per-domain MBM event related data structures to not be allocated during early initialization. resctrl fs initialization follows by implicitly enabling both MBM total and local events on a system that supports assignable counters (mbm_event mode), but this enabling occurs after the per-domain data structures have been created. After booting, resctrl fs assumes that an enabled event can access all its state. This results in NULL pointer dereference when resctrl attempts to access the un-allocated structures of an enabled event. Remove the late MBM event enabling from resctrl fs. This leaves a problem where the X86_FEATURE_CQM_MBM_TOTAL and X86_FEATURE_CQM_MBM_LOCAL features may be disabled while assignable counter (mbm_event) mode is enabled without any events to support. Switching between the "default" and "mbm_event" mode without any events is not practical. Create a dependency between the X86_FEATURE_{CQM_MBM_TOTAL,CQM_MBM_LOCAL} and X86_FEATURE_ABMC (assignable counter) hardware features. An x86 system that supports assignable counters now requires support of X86_FEATURE_CQM_MBM_TOTAL or X86_FEATURE_CQM_MBM_LOCAL. This ensures all needed MBM related data structures are created before use and that it is only possible to switch between "default" and "mbm_event" mode when the same events are available in both modes. This dependency does not exist in the hardware but this usage of these feature settings work for known systems. [ bp: Massage commit message. ] Fixes: 13390861b426e ("x86,fs/resctrl: Detect Assignable Bandwidth Monitoring feature details") Co-developed-by: Reinette Chatre Signed-off-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/a62e6ac063d0693475615edd213d5be5e55443e6.1760560934.git.babu.moger@amd.com --- arch/x86/kernel/cpu/resctrl/monitor.c | 11 ++++++++++- fs/resctrl/monitor.c | 16 +++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 2cd25a0d4637..fe1a2aa53c16 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -458,7 +458,16 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } - if (rdt_cpu_has(X86_FEATURE_ABMC)) { + /* + * resctrl assumes a system that supports assignable counters can + * switch to "default" mode. Ensure that there is a "default" mode + * to switch to. This enforces a dependency between the independent + * X86_FEATURE_ABMC and X86_FEATURE_CQM_MBM_TOTAL/X86_FEATURE_CQM_MBM_LOCAL + * hardware features. + */ + if (rdt_cpu_has(X86_FEATURE_ABMC) && + (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL) || + rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))) { r->mon.mbm_cntr_assignable = true; cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx); r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 4076336fbba6..572a9925bd6c 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1782,15 +1782,13 @@ int resctrl_mon_resource_init(void) mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; if (r->mon.mbm_cntr_assignable) { - if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) - resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); - if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) - resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); - mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; - mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & - (READS_TO_LOCAL_MEM | - READS_TO_LOCAL_S_MEM | - NON_TEMP_WRITE_TO_LOCAL_MEM); + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & + (READS_TO_LOCAL_MEM | + READS_TO_LOCAL_S_MEM | + NON_TEMP_WRITE_TO_LOCAL_MEM); r->mon.mbm_assign_on_mkdir = true; resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); From 9d7dfb95da2cb5c1287df2f3468bcb70d8b31087 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 16 Oct 2025 11:21:47 -0700 Subject: [PATCH 0186/1075] KVM: VMX: Inject #UD if guest tries to execute SEAMCALL or TDCALL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add VMX exit handlers for SEAMCALL and TDCALL to inject a #UD if a non-TD guest attempts to execute SEAMCALL or TDCALL. Neither SEAMCALL nor TDCALL is gated by any software enablement other than VMXON, and so will generate a VM-Exit instead of e.g. a native #UD when executed from the guest kernel. Note! No unprivileged DoS of the L1 kernel is possible as TDCALL and SEAMCALL #GP at CPL > 0, and the CPL check is performed prior to the VMX non-root (VM-Exit) check, i.e. userspace can't crash the VM. And for a nested guest, KVM forwards unknown exits to L1, i.e. an L2 kernel can crash itself, but not L1. Note #2! The Intel® Trust Domain CPU Architectural Extensions spec's pseudocode shows the CPL > 0 check for SEAMCALL coming _after_ the VM-Exit, but that appears to be a documentation bug (likely because the CPL > 0 check was incorrectly bundled with other lower-priority #GP checks). Testing on SPR and EMR shows that the CPL > 0 check is performed before the VMX non-root check, i.e. SEAMCALL #GPs when executed in usermode. Note #3! The aforementioned Trust Domain spec uses confusing pseudocode that says that SEAMCALL will #UD if executed "inSEAM", but "inSEAM" specifically means in SEAM Root Mode, i.e. in the TDX-Module. The long- form description explicitly states that SEAMCALL generates an exit when executed in "SEAM VMX non-root operation". But that's a moot point as the TDX-Module injects #UD if the guest attempts to execute SEAMCALL, as documented in the "Unconditionally Blocked Instructions" section of the TDX-Module base specification. Cc: stable@vger.kernel.org Cc: Kai Huang Cc: Xiaoyao Li Cc: Rick Edgecombe Cc: Dan Williams Cc: Binbin Wu Reviewed-by: Kai Huang Reviewed-by: Binbin Wu Reviewed-by: Xiaoyao Li Link: https://lore.kernel.org/r/20251016182148.69085-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/include/uapi/asm/vmx.h | 1 + arch/x86/kvm/vmx/nested.c | 8 ++++++++ arch/x86/kvm/vmx/vmx.c | 8 ++++++++ 3 files changed, 17 insertions(+) diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index 9792e329343e..1baa86dfe029 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -93,6 +93,7 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_SEAMCALL 76 #define EXIT_REASON_TDCALL 77 #define EXIT_REASON_MSR_READ_IMM 84 #define EXIT_REASON_MSR_WRITE_IMM 85 diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 76271962cb70..bcea087b642f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, case EXIT_REASON_NOTIFY: /* Notify VM exit is not exposed to L1 */ return false; + case EXIT_REASON_SEAMCALL: + case EXIT_REASON_TDCALL: + /* + * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't + * virtualized by KVM for L1 hypervisors, i.e. L1 should + * never want or expect such an exit. + */ + return false; default: return true; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f87c216d976d..91b6f2f3edc2 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6032,6 +6032,12 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu) return 1; } +static int handle_tdx_instruction(struct kvm_vcpu *vcpu) +{ + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; +} + #ifndef CONFIG_X86_SGX_KVM static int handle_encls(struct kvm_vcpu *vcpu) { @@ -6157,6 +6163,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_ENCLS] = handle_encls, [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, [EXIT_REASON_NOTIFY] = handle_notify, + [EXIT_REASON_SEAMCALL] = handle_tdx_instruction, + [EXIT_REASON_TDCALL] = handle_tdx_instruction, [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm, [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm, }; From 789e46fbfca1875671717a20a916ca1a920268e4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Oct 2025 12:51:35 +0300 Subject: [PATCH 0187/1075] drm/i915/panic: fix panic structure allocation memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separating the panic allocation from framebuffer allocation in commit 729c5f7ffa83 ("drm/{i915,xe}/panic: move framebuffer allocation where it belongs") failed to deallocate the panic structure anywhere. The fix is two-fold. First, free the panic structure in intel_user_framebuffer_destroy() in the general case. Second, move the panic allocation later to intel_framebuffer_init() to not leak the panic structure in error paths (if any, now or later) between intel_framebuffer_alloc() and intel_framebuffer_init(). v2: Rebase Fixes: 729c5f7ffa83 ("drm/{i915,xe}/panic: move framebuffer allocation where it belongs") Cc: Jocelyn Falempe Cc: Maarten Lankhorst Reported-by: Michał Grzelak Suggested-by: Ville Syrjälä Tested-by: Michał Grzelak # v1 Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20251015095135.2183415-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 8f8ef09fcf6a3b00369bfc704e8f68d7474eca94) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fb.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index b817ff44c043..c48384e58ea1 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2117,6 +2117,7 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) intel_frontbuffer_put(intel_fb->frontbuffer); + kfree(intel_fb->panic); kfree(intel_fb); } @@ -2215,16 +2216,22 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct intel_display *display = to_intel_display(obj->dev); struct drm_framebuffer *fb = &intel_fb->base; u32 max_stride; - int ret = -EINVAL; + int ret; int i; + intel_fb->panic = intel_panic_alloc(); + if (!intel_fb->panic) + return -ENOMEM; + /* * intel_frontbuffer_get() must be done before * intel_fb_bo_framebuffer_init() to avoid set_tiling vs. addfb race. */ intel_fb->frontbuffer = intel_frontbuffer_get(obj); - if (!intel_fb->frontbuffer) - return -ENOMEM; + if (!intel_fb->frontbuffer) { + ret = -ENOMEM; + goto err_free_panic; + } ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); if (ret) @@ -2323,6 +2330,9 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, intel_fb_bo_framebuffer_fini(obj); err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); +err_free_panic: + kfree(intel_fb->panic); + return ret; } @@ -2349,20 +2359,11 @@ intel_user_framebuffer_create(struct drm_device *dev, struct intel_framebuffer *intel_framebuffer_alloc(void) { struct intel_framebuffer *intel_fb; - struct intel_panic *panic; intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); if (!intel_fb) return NULL; - panic = intel_panic_alloc(); - if (!panic) { - kfree(intel_fb); - return NULL; - } - - intel_fb->panic = panic; - return intel_fb; } From 39a9ed0fb6dac58547afdf9b6cb032d326a3698f Mon Sep 17 00:00:00 2001 From: Haofeng Li Date: Wed, 15 Oct 2025 14:17:53 +0800 Subject: [PATCH 0188/1075] timekeeping: Fix aux clocks sysfs initialization loop bound The loop in tk_aux_sysfs_init() uses `i <= MAX_AUX_CLOCKS` as the termination condition, which results in 9 iterations (i=0 to 8) when MAX_AUX_CLOCKS is defined as 8. However, the kernel is designed to support only up to 8 auxiliary clocks. This off-by-one error causes the creation of a 9th sysfs entry that exceeds the intended auxiliary clock range. Fix the loop bound to use `i < MAX_AUX_CLOCKS` to ensure exactly 8 auxiliary clock entries are created, matching the design specification. Fixes: 7b95663a3d96 ("timekeeping: Provide interface to control auxiliary clocks") Signed-off-by: Haofeng Li Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/tencent_2376993D9FC06A3616A4F981B3DE1C599607@qq.com --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b6974fce800c..3a4d3b2e3f74 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -3070,7 +3070,7 @@ static int __init tk_aux_sysfs_init(void) return -ENOMEM; } - for (int i = 0; i <= MAX_AUX_CLOCKS; i++) { + for (int i = 0; i < MAX_AUX_CLOCKS; i++) { char id[2] = { [0] = '0' + i, }; struct kobject *clk = kobject_create_and_add(id, auxo); From 2551a1eedc09f5a86f94b038dc1bb16855c256f1 Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Fri, 17 Oct 2025 11:28:14 +0200 Subject: [PATCH 0189/1075] kunit: test_dev_action: Correctly cast 'priv' pointer to long* The previous implementation incorrectly assumed the original type of 'priv' was void**, leading to an unnecessary and misleading cast. Correct the cast of the 'priv' pointer in test_dev_action() to its actual type, long*, removing an unnecessary cast. As an additional benefit, this fixes an out-of-bounds CHERI fault on hardware with architectural capabilities. The original implementation tried to store a capability-sized pointer using the priv pointer. However, the priv pointer's capability only granted access to the memory region of its original long type, leading to a bounds violation since the size of a long is smaller than the size of a capability. This change ensures that the pointer usage respects the capabilities' bounds. Link: https://lore.kernel.org/r/20251017092814.80022-1-florian.schmaus@codasip.com Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Reviewed-by: David Gow Signed-off-by: Florian Schmaus Signed-off-by: Shuah Khan --- lib/kunit/kunit-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index 8c01eabd4eaf..63130a48e237 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -739,7 +739,7 @@ static struct kunit_case kunit_current_test_cases[] = { static void test_dev_action(void *priv) { - *(void **)priv = (void *)1; + *(long *)priv = 1; } static void kunit_device_test(struct kunit *test) From 10fad4012234a7dea621ae17c0c9486824f645a0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 18 Oct 2025 14:27:15 +0200 Subject: [PATCH 0190/1075] Revert "cpuidle: menu: Avoid discarding useful information" It is reported that commit 85975daeaa4d ("cpuidle: menu: Avoid discarding useful information") led to a performance regression on Intel Jasper Lake systems because it reduced the time spent by CPUs in idle state C7 which is correlated to the maximum frequency the CPUs can get to because of an average running power limit [1]. Before that commit, get_typical_interval() would have returned UINT_MAX whenever it had been unable to make a high-confidence prediction which had led to selecting the deepest available idle state too often and both power and performance had been inadequate as a result of that on some systems. However, this had not been a problem on systems with relatively aggressive average running power limits, like the Jasper Lake systems in question, because on those systems it was compensated by the ability to run CPUs faster. It was addressed by causing get_typical_interval() to return a number based on the recent idle duration information available to it even if it could not make a high-confidence prediction, but that clearly did not take the possible correlation between idle power and available CPU capacity into account. For this reason, revert most of the changes made by commit 85975daeaa4d, except for one cosmetic cleanup, and add a comment explaining the rationale for returning UINT_MAX from get_typical_interval() when it is unable to make a high-confidence prediction. Fixes: 85975daeaa4d ("cpuidle: menu: Avoid discarding useful information") Closes: https://lore.kernel.org/linux-pm/36iykr223vmcfsoysexug6s274nq2oimcu55ybn6ww4il3g3cv@cohflgdbpnq7/ [1] Reported-by: Sergey Senozhatsky Cc: All applicable Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3663603.iIbC2pHGDl@rafael.j.wysocki --- drivers/cpuidle/governors/menu.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 4d9aa5ce31f0..7d21fb5a72f4 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -188,20 +188,17 @@ static unsigned int get_typical_interval(struct menu_device *data) * * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. + * + * However, if the number of remaining samples is too small to exclude + * any more outliers, allow the deepest available idle state to be + * selected because there are systems where the time spent by CPUs in + * deep idle states is correlated to the maximum frequency the CPUs + * can get to. On those systems, shallow idle states should be avoided + * unless there is a clear indication that the given CPU is most likley + * going to be woken up shortly. */ - if (divisor * 4 <= INTERVALS * 3) { - /* - * If there are sufficiently many data points still under - * consideration after the outliers have been eliminated, - * returning without a prediction would be a mistake because it - * is likely that the next interval will not exceed the current - * maximum, so return the latter in that case. - */ - if (divisor >= INTERVALS / 2) - return max; - + if (divisor * 4 <= INTERVALS * 3) return UINT_MAX; - } /* Update the thresholds for the next round. */ if (avg - min > max - avg) From f3f313c51148668d3c44f1119762325ce2c0715b Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Fri, 17 Oct 2025 15:37:44 +0530 Subject: [PATCH 0191/1075] ACPI: property: Fix argument order in __acpi_node_get_property_reference() A refactoring bug introduced an argument order mistake in the call to acpi_fwnode_get_reference_args() from __acpi_node_get_property_reference(). This caused incorrect behavior when resolving ACPI property references. Fix the issue by correcting the argument order. Fixes: e121be784d35 ("ACPI: property: Refactor acpi_fwnode_get_reference_args() to support nargs_prop") Reported-by: Thomas Richard Closes: https://lore.kernel.org/all/1241f2b6-9b4e-4623-8a83-77db8774ac32@bootlin.com/ Tested-by: Thomas Richard Signed-off-by: Sunil V L Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20251017100744.71871-1-sunilvl@ventanamicro.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/property.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 1b997a5497e7..43d5e457814e 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1107,7 +1107,7 @@ int __acpi_node_get_property_reference(const struct fwnode_handle *fwnode, size_t num_args, struct fwnode_reference_args *args) { - return acpi_fwnode_get_reference_args(fwnode, propname, NULL, index, num_args, args); + return acpi_fwnode_get_reference_args(fwnode, propname, NULL, num_args, index, args); } EXPORT_SYMBOL_GPL(__acpi_node_get_property_reference); From b2c37c1168f537900158c860174001d055d8d583 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 13 Oct 2025 09:26:11 -0600 Subject: [PATCH 0192/1075] MAINTAINERS: Update Alex Williamson's email address Switch to a personal email account as I'll be leaving Red Hat soon. Signed-off-by: Alex Williamson Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20251013152613.3088777-1-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- .mailmap | 1 + MAINTAINERS | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index c371ba3fb845..717d754b378c 100644 --- a/.mailmap +++ b/.mailmap @@ -27,6 +27,7 @@ Alan Cox Alan Cox Aleksandar Markovic Aleksey Gorelov +Alex Williamson Alexander Lobakin Alexander Lobakin Alexander Lobakin diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..74389eddb954 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -26885,7 +26885,7 @@ S: Maintained F: drivers/vfio/cdx/* VFIO DRIVER -M: Alex Williamson +M: Alex Williamson L: kvm@vger.kernel.org S: Maintained T: git https://github.com/awilliam/linux-vfio.git @@ -27048,7 +27048,7 @@ T: git git://linuxtv.org/media.git F: drivers/media/test-drivers/vimc/* VIRT LIB -M: Alex Williamson +M: Alex Williamson M: Paolo Bonzini L: kvm@vger.kernel.org S: Supported From 5da6fb6356362c7eb40ed931b27abc31b3582950 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 20 Oct 2025 10:33:06 +0100 Subject: [PATCH 0193/1075] cifs: Add a couple of missing smb3_rw_credits tracepoints Add missing smb3_rw_credits tracepoints to cifs_readv_callback() (for SMB1) to match those of SMB2/3. Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: Shyam Prasad N cc: Tom Talpey cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifssmb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 2881efcbe09a..7da194f29fef 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1311,6 +1311,8 @@ cifs_readv_callback(struct mid_q_entry *mid) .rreq_debug_id = rdata->rreq->debug_id, .rreq_debug_index = rdata->subreq.debug_index, }; + unsigned int rreq_debug_id = rdata->rreq->debug_id; + unsigned int subreq_debug_index = rdata->subreq.debug_index; cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n", __func__, mid->mid, mid->mid_state, rdata->result, @@ -1374,6 +1376,9 @@ cifs_readv_callback(struct mid_q_entry *mid) __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags); } + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value, + server->credits, server->in_flight, + 0, cifs_trace_rw_credits_read_response_clear); rdata->credits.value = 0; rdata->subreq.error = rdata->result; rdata->subreq.transferred += rdata->got_bytes; @@ -1381,6 +1386,9 @@ cifs_readv_callback(struct mid_q_entry *mid) netfs_read_subreq_terminated(&rdata->subreq); release_mid(mid); add_credits(server, &credits, 0); + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, + server->credits, server->in_flight, + credits.value, cifs_trace_rw_credits_read_response_add); } /* cifs_async_readv - send an async write, and set up mid to handle result */ From cb30dfa75d55eced379a42fd67bd5fb7ec38555e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Oct 2025 15:17:18 -0300 Subject: [PATCH 0194/1075] iommufd: Don't overflow during division for dirty tracking If pgshift is 63 then BITS_PER_TYPE(*bitmap->bitmap) * pgsize will overflow to 0 and this triggers divide by 0. In this case the index should just be 0, so reorganize things to divide by shift and avoid hitting any overflows. Link: https://patch.msgid.link/r/0-v1-663679b57226+172-iommufd_dirty_div0_jgg@nvidia.com Cc: stable@vger.kernel.org Fixes: 58ccf0190d19 ("vfio: Add an IOVA bitmap support") Reviewed-by: Joao Martins Reviewed-by: Nicolin Chen Reviewed-by: Kevin Tian Reported-by: syzbot+093a8a8b859472e6c257@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=093a8a8b859472e6c257 Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iova_bitmap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index 4514575818fc..b5b67a9d3fb3 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -130,9 +130,8 @@ struct iova_bitmap { static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, unsigned long iova) { - unsigned long pgsize = 1UL << bitmap->mapped.pgshift; - - return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); + return (iova >> bitmap->mapped.pgshift) / + BITS_PER_TYPE(*bitmap->bitmap); } /* From b09ed52db1e688eb8205b1939ca1345179ecd515 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 14 Oct 2025 14:48:46 -0700 Subject: [PATCH 0195/1075] iommufd/selftest: Fix ioctl return value in _test_cmd_trigger_vevents() The ioctl returns 0 upon success, so !0 returning -1 breaks the selftest. Drop the '!' to fix it. Fixes: 1d235d849425 ("iommu/selftest: prevent use of uninitialized variable") Link: https://patch.msgid.link/r/20251014214847.1113759-1-nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd_utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 772ca1db6e59..9f472c20c190 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -1044,8 +1044,8 @@ static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents) }; while (nvevents--) { - if (!ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), - &trigger_vevent_cmd)) + if (ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), + &trigger_vevent_cmd)) return -1; } return 0; From a73ca0449bcb7c238097cc6a1bf3fd82a78374df Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 17 Oct 2025 16:06:14 -0400 Subject: [PATCH 0196/1075] selftests: net: fix server bind failure in sctp_vrf.sh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sctp_vrf.sh could fail: TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N [FAIL] not ok 1 selftests: net: sctp_vrf.sh # exit=3 The failure happens when the server bind in a new run conflicts with an existing association from the previous run: [1] ip netns exec $SERVER_NS ./sctp_hello server ... [2] ip netns exec $CLIENT_NS ./sctp_hello client ... [3] ip netns exec $SERVER_NS pkill sctp_hello ... [4] ip netns exec $SERVER_NS ./sctp_hello server ... It occurs if the client in [2] sends a message and closes immediately. With the message unacked, no SHUTDOWN is sent. Killing the server in [3] triggers a SHUTDOWN the client also ignores due to the unacked message, leaving the old association alive. This causes the bind at [4] to fail until the message is acked and the client responds to a second SHUTDOWN after the server’s T2 timer expires (3s). This patch fixes the issue by preventing the client from sending data. Instead, the client blocks on recv() and waits for the server to close. It also waits until both the server and the client sockets are fully released in stop_server and wait_client before restarting. Additionally, replace 2>&1 >/dev/null with -q in sysctl and grep, and drop other redundant 2>&1 >/dev/null redirections, and fix a typo from N to Y (connect successfully) in the description of the last test. Fixes: a61bd7b9fef3 ("selftests: add a selftest for sctp vrf") Reported-by: Hangbin Liu Tested-by: Jakub Kicinski Signed-off-by: Xin Long Link: https://patch.msgid.link/be2dacf52d0917c4ba5e2e8c5a9cb640740ad2b6.1760731574.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/sctp_hello.c | 17 +----- tools/testing/selftests/net/sctp_vrf.sh | 73 +++++++++++++++--------- 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c index f02f1f95d227..a04dac0b8027 100644 --- a/tools/testing/selftests/net/sctp_hello.c +++ b/tools/testing/selftests/net/sctp_hello.c @@ -29,7 +29,6 @@ static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len static int do_client(int argc, char *argv[]) { struct sockaddr_storage ss; - char buf[] = "hello"; int csk, ret, len; if (argc < 5) { @@ -56,16 +55,10 @@ static int do_client(int argc, char *argv[]) set_addr(&ss, argv[3], argv[4], &len); ret = connect(csk, (struct sockaddr *)&ss, len); - if (ret < 0) { - printf("failed to connect to peer\n"); + if (ret < 0) return -1; - } - ret = send(csk, buf, strlen(buf) + 1, 0); - if (ret < 0) { - printf("failed to send msg %d\n", ret); - return -1; - } + recv(csk, NULL, 0, 0); close(csk); return 0; @@ -75,7 +68,6 @@ int main(int argc, char *argv[]) { struct sockaddr_storage ss; int lsk, csk, ret, len; - char buf[20]; if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s server|client ...\n", argv[0]); @@ -125,11 +117,6 @@ int main(int argc, char *argv[]) return -1; } - ret = recv(csk, buf, sizeof(buf), 0); - if (ret <= 0) { - printf("failed to recv msg %d\n", ret); - return -1; - } close(csk); close(lsk); diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh index c854034b6aa1..667b211aa8a1 100755 --- a/tools/testing/selftests/net/sctp_vrf.sh +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -20,9 +20,9 @@ setup() { modprobe sctp_diag setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS - ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null - ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null - ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $CLIENT_NS1 sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip net exec $CLIENT_NS2 sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip net exec $SERVER_NS sysctl -wq net.ipv6.conf.default.accept_dad=0 ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1 ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2 @@ -62,17 +62,40 @@ setup() { } cleanup() { - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + wait_client $CLIENT_NS1 + wait_client $CLIENT_NS2 + stop_server cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS } -wait_server() { +start_server() { local IFACE=$1 local CNT=0 - until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \ - grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do - [ $((CNT++)) = "20" ] && { RET=3; return $RET; } + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP $SERVER_PORT $IFACE & + disown + until ip netns exec $SERVER_NS ss -SlH | grep -q "$IFACE"; do + [ $((CNT++)) -eq 30 ] && { RET=3; return $RET; } + sleep 0.1 + done +} + +stop_server() { + local CNT=0 + + ip netns exec $SERVER_NS pkill sctp_hello + while ip netns exec $SERVER_NS ss -SaH | grep -q .; do + [ $((CNT++)) -eq 30 ] && break + sleep 0.1 + done +} + +wait_client() { + local CLIENT_NS=$1 + local CNT=0 + + while ip netns exec $CLIENT_NS ss -SaH | grep -q .; do + [ $((CNT++)) -eq 30 ] && break sleep 0.1 done } @@ -81,14 +104,12 @@ do_test() { local CLIENT_NS=$1 local IFACE=$2 - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE 2>&1 >/dev/null & - disown - wait_server $IFACE || return $RET + start_server $IFACE || return $RET timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT RET=$? + wait_client $CLIENT_NS + stop_server return $RET } @@ -96,25 +117,21 @@ do_testx() { local IFACE1=$1 local IFACE2=$2 - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE1 2>&1 >/dev/null & - disown - wait_server $IFACE1 || return $RET - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE2 2>&1 >/dev/null & - disown - wait_server $IFACE2 || return $RET + start_server $IFACE1 || return $RET + start_server $IFACE2 || return $RET timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT && \ timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT RET=$? + wait_client $CLIENT_NS1 + wait_client $CLIENT_NS2 + stop_server return $RET } testup() { - ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null + ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=1 echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y " do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; } echo "[PASS]" @@ -123,7 +140,7 @@ testup() { do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } echo "[PASS]" - ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null + ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=0 echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N " do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; } echo "[PASS]" @@ -160,7 +177,7 @@ testup() { do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; } echo "[PASS]" - echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N " + echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, Y " do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; } echo "[PASS]" } From 9a3c0d6834194b6e3cce4ffbb55f800c6cb58c86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 15 Oct 2025 19:07:25 +0200 Subject: [PATCH 0197/1075] drm/xe: Retain vma flags when recreating and splitting vmas for madvise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When splitting and restoring vmas for madvise, we only copied the XE_VMA_SYSTEM_ALLOCATOR flag. That meant we lost flags for read_only, dumpable and sparse (in case anyone would call madvise for the latter). Instead, define a mask of relevant flags and ensure all are replicated, To simplify this and make the code a bit less fragile, remove the conversion to VMA_CREATE flags and instead just pass around the gpuva flags after initial conversion from user-space. Fixes: a2eb8aec3ebe ("drm/xe: Reset VMA attributes to default in SVM garbage collector") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251015170726.178685-1-thomas.hellstrom@linux.intel.com (cherry picked from commit b3af8658ec70f2196190c66103478352286aba3b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pt.c | 4 +- drivers/gpu/drm/xe/xe_vm.c | 86 +++++++++++--------------------- drivers/gpu/drm/xe/xe_vm_types.h | 9 +--- 3 files changed, 32 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index a1c88f9a6c76..07f96bda638a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2022,7 +2022,7 @@ static int op_prepare(struct xe_vm *vm, case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm) && !op->map.invalidate_on_bind) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma, @@ -2252,7 +2252,7 @@ static void op_commit(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f602b874e054..f7a0931eb66c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -616,6 +616,12 @@ static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, vops->pt_update_ops[i].num_ops += inc_val; } +#define XE_VMA_CREATE_MASK ( \ + XE_VMA_READ_ONLY | \ + XE_VMA_DUMPABLE | \ + XE_VMA_SYSTEM_ALLOCATOR | \ + DRM_GPUVA_SPARSE) + static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) { @@ -628,8 +634,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, op->base.map.gem.offset = vma->gpuva.gem.offset; op->map.vma = vma; op->map.immediate = true; - op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; - op->map.is_null = xe_vma_is_null(vma); + op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; } static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, @@ -932,11 +937,6 @@ static void xe_vma_free(struct xe_vma *vma) kfree(vma); } -#define VMA_CREATE_FLAG_READ_ONLY BIT(0) -#define VMA_CREATE_FLAG_IS_NULL BIT(1) -#define VMA_CREATE_FLAG_DUMPABLE BIT(2) -#define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) - static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, @@ -947,11 +947,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_vma *vma; struct xe_tile *tile; u8 id; - bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); - bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); - bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); - bool is_cpu_addr_mirror = - (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); + bool is_null = (flags & DRM_GPUVA_SPARSE); + bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -972,10 +969,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (!vma) return ERR_PTR(-ENOMEM); - if (is_cpu_addr_mirror) - vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; if (bo) vma->gpuva.gem.obj = &bo->ttm.base; } @@ -986,10 +979,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.vm = &vm->gpuvm; vma->gpuva.va.addr = start; vma->gpuva.va.range = end - start + 1; - if (read_only) - vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (dumpable) - vma->gpuva.flags |= XE_VMA_DUMPABLE; + vma->gpuva.flags = flags; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -2272,12 +2262,14 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, if (__op->op == DRM_GPUVA_OP_MAP) { op->map.immediate = flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - op->map.is_cpu_addr_mirror = flags & - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; - op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_READONLY) + op->map.vma_flags |= XE_VMA_READ_ONLY; + if (flags & DRM_XE_VM_BIND_FLAG_NULL) + op->map.vma_flags |= DRM_GPUVA_SPARSE; + if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; + if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) + op->map.vma_flags |= XE_VMA_DUMPABLE; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); @@ -2590,14 +2582,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, .pat_index = op->map.pat_index, }; - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->map.is_null ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->map.dumpable ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= op->map.is_cpu_addr_mirror ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; + flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; vma = new_vma(vm, &op->base.map, &default_attr, flags); @@ -2606,7 +2591,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->map.vma = vma; if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && - !op->map.is_cpu_addr_mirror) || + !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); @@ -2637,18 +2622,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= xe_vma_is_cpu_addr_mirror(old) ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; - + flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { vma = new_vma(vm, op->base.remap.prev, &old->attr, flags); @@ -4212,7 +4186,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, struct xe_vma_ops vops; struct drm_gpuva_ops *ops = NULL; struct drm_gpuva_op *__op; - bool is_cpu_addr_mirror = false; + unsigned int vma_flags = 0; bool remap_op = false; struct xe_vma_mem_attr tmp_attr; u16 default_pat; @@ -4242,15 +4216,17 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, vma = gpuva_to_vma(op->base.unmap.va); XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_REMAP) { vma = gpuva_to_vma(op->base.remap.unmap->va); default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.is_cpu_addr_mirror = true; + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; op->map.pat_index = default_pat; } } else { @@ -4259,11 +4235,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, xe_assert(vm->xe, !remap_op); xe_assert(vm->xe, xe_vma_has_no_bo(vma)); remap_op = true; - - if (xe_vma_is_cpu_addr_mirror(vma)) - is_cpu_addr_mirror = true; - else - is_cpu_addr_mirror = false; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_MAP) { @@ -4272,10 +4244,10 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, /* * In case of madvise ops DRM_GPUVA_OP_MAP is * always after DRM_GPUVA_OP_REMAP, so ensure - * we assign op->map.is_cpu_addr_mirror true - * if REMAP is for xe_vma_is_cpu_addr_mirror vma + * to propagate the flags from the vma we're + * unmapping. */ - op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; } } print_op(vm->xe, __op); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 413353e1c225..a3b422b27ae8 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -345,17 +345,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + unsigned int vma_flags; /** @immediate: Immediate bind */ bool immediate; /** @read_only: Read only */ - bool read_only; - /** @is_null: is NULL binding */ - bool is_null; - /** @is_cpu_addr_mirror: is CPU address mirror binding */ - bool is_cpu_addr_mirror; - /** @dumpable: whether BO is dumped on GPU hang */ - bool dumpable; - /** @invalidate: invalidate the VMA before bind */ bool invalidate_on_bind; /** @pat_index: The pat index to use for this operation. */ u16 pat_index; From ce831bffcef3d8f9691b5537d74ffa1b1256c017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 15 Oct 2025 19:07:26 +0200 Subject: [PATCH 0198/1075] drm/xe/uapi: Hide the madvise autoreset behind a VM_BIND flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The madvise implementation currently resets the SVM madvise if the underlying CPU map is unmapped. This is in an attempt to mimic the CPU madvise behaviour. However, it's not clear that this is a desired behaviour since if the end app user relies on it for malloc()ed objects or stack objects, it may not work as intended. Instead of having the autoreset functionality being a direct application-facing implicit UAPI, make the UMD explicitly choose this behaviour if it wants to expose it by introducing DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET, and add a semantics description. v2: - Kerneldoc fixes. Fix a commit log message. Fixes: a2eb8aec3ebe ("drm/xe: Reset VMA attributes to default in SVM garbage collector") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Cc: "Falkowski, John" Cc: "Mrozek, Michal" Signed-off-by: Thomas Hellström Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251015170726.178685-2-thomas.hellstrom@linux.intel.com (cherry picked from commit 59a2d3f38ab23cce4cd9f0c4a5e08fdfe9e67ae7) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 5 +++++ drivers/gpu/drm/xe/xe_vm.c | 12 +++++++++--- drivers/gpu/drm/xe/xe_vm_types.h | 1 + include/uapi/drm/xe_drm.h | 15 +++++++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index da2a412f80c0..129e7818565c 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -302,6 +302,11 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 if (!vma) return -EINVAL; + if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) { + drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n"); + return 0; + } + if (xe_vma_has_default_mem_attrs(vma)) return 0; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f7a0931eb66c..63c65e3d207b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -620,7 +620,8 @@ static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, XE_VMA_READ_ONLY | \ XE_VMA_DUMPABLE | \ XE_VMA_SYSTEM_ALLOCATOR | \ - DRM_GPUVA_SPARSE) + DRM_GPUVA_SPARSE | \ + XE_VMA_MADV_AUTORESET) static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) @@ -2270,6 +2271,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) op->map.vma_flags |= XE_VMA_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) + op->map.vma_flags |= XE_VMA_MADV_AUTORESET; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); @@ -3253,7 +3256,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE | \ DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ + DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) #ifdef TEST_VM_OPS_ERROR #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) @@ -3368,7 +3372,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && !(BIT(prefetch_region) & xe->info.mem_region_mask))) || XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_UNMAP)) { + op == DRM_XE_VM_BIND_OP_UNMAP) || + XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && + (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { err = -EINVAL; goto free_bind_ops; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index a3b422b27ae8..d6e2a0fdd4b3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -46,6 +46,7 @@ struct xe_vm_pgtable_update_op; #define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7) #define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) #define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9) +#define XE_VMA_MADV_AUTORESET (DRM_GPUVA_USERBITS << 10) /** * struct xe_vma_mem_attr - memory attributes associated with vma diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 40ff19f52a8d..517489a7ec60 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1013,6 +1013,20 @@ struct drm_xe_vm_destroy { * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. + * - %DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET - Can be used in combination with + * %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR to reset madvises when the underlying + * CPU address space range is unmapped (typically with munmap(2) or brk(2)). + * The madvise values set with &DRM_IOCTL_XE_MADVISE are reset to the values + * that were present immediately after the &DRM_IOCTL_XE_VM_BIND. + * The reset GPU virtual address range is the intersection of the range bound + * using &DRM_IOCTL_XE_VM_BIND and the virtual CPU address space range + * unmapped. + * This functionality is present to mimic the behaviour of CPU address space + * madvises set using madvise(2), which are typically reset on unmap. + * Note: free(3) may or may not call munmap(2) and/or brk(2), and may thus + * not invoke autoreset. Neither will stack variables going out of scope. + * Therefore it's recommended to always explicitly reset the madvises when + * freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call. * * The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be: * - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in @@ -1119,6 +1133,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) #define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) #define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) +#define DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET (1 << 6) /** @flags: Bind flags */ __u32 flags; From afd5ba577c10639f62e8120df67dc70ea4b61176 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 16 Oct 2025 22:55:39 +0300 Subject: [PATCH 0199/1075] net/mlx5e: RX, Fix generating skb from non-linear xdp_buff for legacy RQ XDP programs can release xdp_buff fragments when calling bpf_xdp_adjust_tail(). The driver currently assumes the number of fragments to be unchanged and may generate skb with wrong truesize or containing invalid frags. Fix the bug by generating skb according to xdp_buff after the XDP program runs. Fixes: ea5d49bdae8b ("net/mlx5e: Add XDP multi buffer support to the non-linear legacy RQ") Reviewed-by: Dragos Tatulea Signed-off-by: Amery Hung Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1760644540-899148-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 263d5628ee44..17cab14b328b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1794,14 +1794,27 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi } prog = rcu_dereference(rq->xdp_prog); - if (prog && mlx5e_xdp_handle(rq, prog, mxbuf)) { - if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { - struct mlx5e_wqe_frag_info *pwi; + if (prog) { + u8 nr_frags_free, old_nr_frags = sinfo->nr_frags; - for (pwi = head_wi; pwi < wi; pwi++) - pwi->frag_page->frags++; + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { + if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, + rq->flags)) { + struct mlx5e_wqe_frag_info *pwi; + + wi -= old_nr_frags - sinfo->nr_frags; + + for (pwi = head_wi; pwi < wi; pwi++) + pwi->frag_page->frags++; + } + return NULL; /* page/packet was consumed by XDP */ + } + + nr_frags_free = old_nr_frags - sinfo->nr_frags; + if (unlikely(nr_frags_free)) { + wi -= nr_frags_free; + truesize -= nr_frags_free * frag_info->frag_stride; } - return NULL; /* page/packet was consumed by XDP */ } skb = mlx5e_build_linear_skb( From 87bcef158ac1faca1bd7e0104588e8e2956d10be Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Thu, 16 Oct 2025 22:55:40 +0300 Subject: [PATCH 0200/1075] net/mlx5e: RX, Fix generating skb from non-linear xdp_buff for striding RQ XDP programs can change the layout of an xdp_buff through bpf_xdp_adjust_tail() and bpf_xdp_adjust_head(). Therefore, the driver cannot assume the size of the linear data area nor fragments. Fix the bug in mlx5 by generating skb according to xdp_buff after XDP programs run. Currently, when handling multi-buf XDP, the mlx5 driver assumes the layout of an xdp_buff to be unchanged. That is, the linear data area continues to be empty and fragments remain the same. This may cause the driver to generate erroneous skb or triggering a kernel warning. When an XDP program added linear data through bpf_xdp_adjust_head(), the linear data will be ignored as mlx5e_build_linear_skb() builds an skb without linear data and then pull data from fragments to fill the linear data area. When an XDP program has shrunk the non-linear data through bpf_xdp_adjust_tail(), the delta passed to __pskb_pull_tail() may exceed the actual nonlinear data size and trigger the BUG_ON in it. To fix the issue, first record the original number of fragments. If the number of fragments changes after the XDP program runs, rewind the end fragment pointer by the difference and recalculate the truesize. Then, build the skb with the linear data area matching the xdp_buff. Finally, only pull data in if there is non-linear data and fill the linear part up to 256 bytes. Fixes: f52ac7028bec ("net/mlx5e: RX, Add XDP multi-buffer support in Striding RQ") Signed-off-by: Amery Hung Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1760644540-899148-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 17cab14b328b..1c79adc51a04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2040,6 +2040,7 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w u32 byte_cnt = cqe_bcnt; struct skb_shared_info *sinfo; unsigned int truesize = 0; + u32 pg_consumed_bytes; struct bpf_prog *prog; struct sk_buff *skb; u32 linear_frame_sz; @@ -2093,7 +2094,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w while (byte_cnt) { /* Non-linear mode, hence non-XSK, which always uses PAGE_SIZE. */ - u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + pg_consumed_bytes = + min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) truesize += pg_consumed_bytes; @@ -2109,10 +2111,15 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w } if (prog) { + u8 nr_frags_free, old_nr_frags = sinfo->nr_frags; + u32 len; + if (mlx5e_xdp_handle(rq, prog, mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { struct mlx5e_frag_page *pfp; + frag_page -= old_nr_frags - sinfo->nr_frags; + for (pfp = head_page; pfp < frag_page; pfp++) pfp->frags++; @@ -2123,9 +2130,19 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w return NULL; /* page/packet was consumed by XDP */ } + nr_frags_free = old_nr_frags - sinfo->nr_frags; + if (unlikely(nr_frags_free)) { + frag_page -= nr_frags_free; + truesize -= (nr_frags_free - 1) * PAGE_SIZE + + ALIGN(pg_consumed_bytes, + BIT(rq->mpwqe.log_stride_sz)); + } + + len = mxbuf->xdp.data_end - mxbuf->xdp.data; + skb = mlx5e_build_linear_skb( rq, mxbuf->xdp.data_hard_start, linear_frame_sz, - mxbuf->xdp.data - mxbuf->xdp.data_hard_start, 0, + mxbuf->xdp.data - mxbuf->xdp.data_hard_start, len, mxbuf->xdp.data - mxbuf->xdp.data_meta); if (unlikely(!skb)) { mlx5e_page_release_fragmented(rq->page_pool, @@ -2150,8 +2167,11 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w do pagep->frags++; while (++pagep < frag_page); + + headlen = min_t(u16, MLX5E_RX_MAX_HEAD - len, + skb->data_len); + __pskb_pull_tail(skb, headlen); } - __pskb_pull_tail(skb, headlen); } else { dma_addr_t addr; From f584239a9ed25057496bf397c370cc5163dde419 Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Fri, 17 Oct 2025 10:48:27 +0800 Subject: [PATCH 0201/1075] net/smc: fix general protection fault in __smc_diag_dump The syzbot report a crash: Oops: general protection fault, probably for non-canonical address 0xfbd5a5d5a0000003: 0000 [#1] SMP KASAN NOPTI KASAN: maybe wild-memory-access in range [0xdead4ead00000018-0xdead4ead0000001f] CPU: 1 UID: 0 PID: 6949 Comm: syz.0.335 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025 RIP: 0010:smc_diag_msg_common_fill net/smc/smc_diag.c:44 [inline] RIP: 0010:__smc_diag_dump.constprop.0+0x3ca/0x2550 net/smc/smc_diag.c:89 Call Trace: smc_diag_dump_proto+0x26d/0x420 net/smc/smc_diag.c:217 smc_diag_dump+0x27/0x90 net/smc/smc_diag.c:234 netlink_dump+0x539/0xd30 net/netlink/af_netlink.c:2327 __netlink_dump_start+0x6d6/0x990 net/netlink/af_netlink.c:2442 netlink_dump_start include/linux/netlink.h:341 [inline] smc_diag_handler_dump+0x1f9/0x240 net/smc/smc_diag.c:251 __sock_diag_cmd net/core/sock_diag.c:249 [inline] sock_diag_rcv_msg+0x438/0x790 net/core/sock_diag.c:285 netlink_rcv_skb+0x158/0x420 net/netlink/af_netlink.c:2552 netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline] netlink_unicast+0x5a7/0x870 net/netlink/af_netlink.c:1346 netlink_sendmsg+0x8d1/0xdd0 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg net/socket.c:729 [inline] ____sys_sendmsg+0xa95/0xc70 net/socket.c:2614 ___sys_sendmsg+0x134/0x1d0 net/socket.c:2668 __sys_sendmsg+0x16d/0x220 net/socket.c:2700 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xcd/0x4e0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f The process like this: (CPU1) | (CPU2) ---------------------------------|------------------------------- inet_create() | // init clcsock to NULL | sk = sk_alloc() | | // unexpectedly change clcsock | inet_init_csk_locks() | | // add sk to hash table | smc_inet_init_sock() | smc_sk_init() | smc_hash_sk() | | // traverse the hash table | smc_diag_dump_proto | __smc_diag_dump() | // visit wrong clcsock | smc_diag_msg_common_fill() // alloc clcsock | smc_create_clcsk | sock_create_kern | With CONFIG_DEBUG_LOCK_ALLOC=y, the smc->clcsock is unexpectedly changed in inet_init_csk_locks(). The INET_PROTOSW_ICSK flag is no need by smc, just remove it. After removing the INET_PROTOSW_ICSK flag, this patch alse revert commit 6fd27ea183c2 ("net/smc: fix lacks of icsk_syn_mss with IPPROTO_SMC") to avoid casting smc_sock to inet_connection_sock. Reported-by: syzbot+f775be4458668f7d220e@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f775be4458668f7d220e Tested-by: syzbot+f775be4458668f7d220e@syzkaller.appspotmail.com Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Signed-off-by: Wang Liang Reviewed-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Reviewed-by: D. Wythe Link: https://patch.msgid.link/20251017024827.3137512-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- net/smc/smc_inet.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c index a944e7dcb8b9..a94084b4a498 100644 --- a/net/smc/smc_inet.c +++ b/net/smc/smc_inet.c @@ -56,7 +56,6 @@ static struct inet_protosw smc_inet_protosw = { .protocol = IPPROTO_SMC, .prot = &smc_inet_prot, .ops = &smc_inet_stream_ops, - .flags = INET_PROTOSW_ICSK, }; #if IS_ENABLED(CONFIG_IPV6) @@ -104,27 +103,15 @@ static struct inet_protosw smc_inet6_protosw = { .protocol = IPPROTO_SMC, .prot = &smc_inet6_prot, .ops = &smc_inet6_stream_ops, - .flags = INET_PROTOSW_ICSK, }; #endif /* CONFIG_IPV6 */ -static unsigned int smc_sync_mss(struct sock *sk, u32 pmtu) -{ - /* No need pass it through to clcsock, mss can always be set by - * sock_create_kern or smc_setsockopt. - */ - return 0; -} - static int smc_inet_init_sock(struct sock *sk) { struct net *net = sock_net(sk); /* init common smc sock */ smc_sk_init(net, sk, IPPROTO_SMC); - - inet_csk(sk)->icsk_sync_mss = smc_sync_mss; - /* create clcsock */ return smc_create_clcsk(net, sk, sk->sk_family); } From 914f377075d646b4695a7868ba090f4c714dfd4b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 13 Oct 2025 12:08:29 +0900 Subject: [PATCH 0202/1075] xfs: Improve CONFIG_XFS_RT Kconfig help Improve the description of the XFS_RT configuration option to document that this option is required for zoned block devices. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 8930d5254e1d..d66d517c99a9 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -119,6 +119,15 @@ config XFS_RT See the xfs man page in section 5 for additional information. + This option is mandatory to support zoned block devices. For these + devices, the realtime subvolume must be backed by a zoned block + device and a regular block device used as the main device (for + metadata). If the zoned block device is a host-managed SMR hard-disk + containing conventional zones at the beginning of its address space, + XFS will use the disk conventional zones as the main device and the + remaining sequential write required zones as the backing storage for + the realtime subvolume. + If unsure, say N. config XFS_DRAIN_INTENTS From b00bcb190eef35ae4da3c424b8a72f287e69f650 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 14 Oct 2025 13:19:45 +0900 Subject: [PATCH 0203/1075] xfs: do not tightly pack-write large files When using a zoned realtime device, tightly packing of data blocks belonging to multiple closed files into the same realtime group (RTG) is very efficient at improving write performance. This is especially true with SMR HDDs as this can reduce, and even suppress, disk head seeks. However, such tight packing does not make sense for large files that require at least a full RTG. If tight packing placement is applied for such files, the VM writeback thread switching between inodes result in the large files to be fragmented, thus increasing the garbage collection penalty later when the RTG needs to be reclaimed. This problem can be avoided with a simple heuristic: if the size of the inode being written back is at least equal to the RTG size, do not use tight-packing. Modify xfs_zoned_pack_tight() to always return false in this case. With this change, a multi-writer workload writing files of 256 MB on a file system backed by an SMR HDD with 256 MB zone size as a realtime device sees all files occupying exactly one RTG (i.e. one device zone), thus completely removing the heavy fragmentation observed without this change. Signed-off-by: Damien Le Moal Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 1147bacb2da8..1b462cd5d8fa 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -614,14 +614,25 @@ static inline enum rw_hint xfs_inode_write_hint(struct xfs_inode *ip) } /* - * Try to pack inodes that are written back after they were closed tight instead - * of trying to open new zones for them or spread them to the least recently - * used zone. This optimizes the data layout for workloads that untar or copy - * a lot of small files. Right now this does not separate multiple such + * Try to tightly pack small files that are written back after they were closed + * instead of trying to open new zones for them or spread them to the least + * recently used zone. This optimizes the data layout for workloads that untar + * or copy a lot of small files. Right now this does not separate multiple such * streams. */ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip) { + struct xfs_mount *mp = ip->i_mount; + size_t zone_capacity = + XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].blocks); + + /* + * Do not pack write files that are already using a full zone to avoid + * fragmentation. + */ + if (i_size_read(VFS_I(ip)) >= zone_capacity) + return false; + return !inode_is_open_for_write(VFS_I(ip)) && !(ip->i_diflags & XFS_DIFLAG_APPEND); } From f5caeb3689ea2d8a8c0790d9eea68b63e8f15496 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 13 Oct 2025 10:48:46 +0200 Subject: [PATCH 0204/1075] xfs: XFS_ONLINE_SCRUB_STATS should depend on DEBUG_FS Currently, XFS_ONLINE_SCRUB_STATS selects DEBUG_FS. However, DEBUG_FS is meant for debugging, and people may want to disable it on production systems. Since commit 0ff51a1fd786f47b ("xfs: enable online fsck by default in Kconfig")), XFS_ONLINE_SCRUB_STATS is enabled by default, forcing DEBUG_FS enabled too. Fix this by replacing the selection of DEBUG_FS by a dependency on DEBUG_FS, which is what most other options controlling the gathering and exposing of statistics do. Signed-off-by: Geert Uytterhoeven Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index d66d517c99a9..b99da294e9a3 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -165,7 +165,7 @@ config XFS_ONLINE_SCRUB_STATS bool "XFS online metadata check usage data collection" default y depends on XFS_ONLINE_SCRUB - select DEBUG_FS + depends on DEBUG_FS help If you say Y here, the kernel will gather usage data about the online metadata check subsystem. This includes the number From 179753aa5b7890b311968c033d08f558f0a7be21 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:48 +0200 Subject: [PATCH 0205/1075] drm/panic: Fix drawing the logo on a small narrow screen If the logo width is bigger than the framebuffer width, and the height is big enough to hold the logo and the message, it will draw at x coordinate that are higher than the width, and ends up in a corrupted image. Fixes: 4b570ac2eb54 ("drm/rect: Add drm_rect_overlap()") Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-2-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 1d6312fa1429..23ba791c6131 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -429,6 +429,9 @@ static void drm_panic_logo_rect(struct drm_rect *rect, const struct font_desc *f static void drm_panic_logo_draw(struct drm_scanout_buffer *sb, struct drm_rect *rect, const struct font_desc *font, u32 fg_color) { + if (rect->x2 > sb->width || rect->y2 > sb->height) + return; + if (logo_mono) drm_panic_blit(sb, rect, logo_mono->data, DIV_ROUND_UP(drm_rect_width(rect), 8), 1, fg_color); From cfa56e0a0e9b259077b0cb88b431e37dc9a67dee Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:49 +0200 Subject: [PATCH 0206/1075] drm/panic: Fix overlap between qr code and logo The borders of the qr code was not taken into account to check if it overlap with the logo, leading to the logo being partially covered. Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-3-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 23ba791c6131..179cbf21f22d 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -749,7 +749,7 @@ static int _draw_panic_static_qr_code(struct drm_scanout_buffer *sb) /* Fill with the background color, and draw text on top */ drm_panic_fill(sb, &r_screen, bg_color); - if (!drm_rect_overlap(&r_logo, &r_msg) && !drm_rect_overlap(&r_logo, &r_qr)) + if (!drm_rect_overlap(&r_logo, &r_msg) && !drm_rect_overlap(&r_logo, &r_qr_canvas)) drm_panic_logo_draw(sb, &r_logo, font, fg_color); draw_txt_rectangle(sb, font, panic_msg, panic_msg_lines, true, &r_msg, fg_color); From 4fcffb5e5c8c0c8e2ad9c99a22305a0afbecc294 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:50 +0200 Subject: [PATCH 0207/1075] drm/panic: Fix qr_code, ensure vmargin is positive Depending on qr_code size and screen size, the vertical margin can be negative, that means there is not enough room to draw the qr_code. So abort early, to avoid a segfault by trying to draw at negative coordinates. Fixes: cb5164ac43d0f ("drm/panic: Add a QR code panic screen") Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-4-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 179cbf21f22d..281bb2dabf81 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -736,7 +736,10 @@ static int _draw_panic_static_qr_code(struct drm_scanout_buffer *sb) pr_debug("QR width %d and scale %d\n", qr_width, scale); r_qr_canvas = DRM_RECT_INIT(0, 0, qr_canvas_width * scale, qr_canvas_width * scale); - v_margin = (sb->height - drm_rect_height(&r_qr_canvas) - drm_rect_height(&r_msg)) / 5; + v_margin = sb->height - drm_rect_height(&r_qr_canvas) - drm_rect_height(&r_msg); + if (v_margin < 0) + return -ENOSPC; + v_margin /= 5; drm_rect_translate(&r_qr_canvas, (sb->width - r_qr_canvas.x2) / 2, 2 * v_margin); r_qr = DRM_RECT_INIT(r_qr_canvas.x1 + QR_MARGIN * scale, r_qr_canvas.y1 + QR_MARGIN * scale, From e9b36fe0630046e61224216dc92513a69f72b5f0 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:51 +0200 Subject: [PATCH 0208/1075] drm/panic: Fix kmsg text drawing rectangle The rectangle height was larger than the screen size. This has no real impact. Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-5-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 281bb2dabf81..69be9d835ccf 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -480,7 +480,7 @@ static int draw_line_with_wrap(struct drm_scanout_buffer *sb, const struct font_ struct drm_panic_line *line, int yoffset, u32 fg_color) { int chars_per_row = sb->width / font->width; - struct drm_rect r_txt = DRM_RECT_INIT(0, yoffset, sb->width, sb->height); + struct drm_rect r_txt = DRM_RECT_INIT(0, yoffset, sb->width, font->height); struct drm_panic_line line_wrap; if (line->len > chars_per_row) { From 2e337dd278c6c38982b520c309f36e0f88696e6e Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:52 +0200 Subject: [PATCH 0209/1075] drm/panic: Fix divide by 0 if the screen width < font width In the unlikely case that the screen is tiny, and smaller than the font width, it leads to a divide by 0: draw_line_with_wrap() chars_per_row = sb->width / font->width = 0 line_wrap.len = line->len % chars_per_row; This will trigger a divide by 0 Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-6-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 69be9d835ccf..bc5158683b2b 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -523,7 +523,7 @@ static void draw_panic_static_kmsg(struct drm_scanout_buffer *sb) struct drm_panic_line line; int yoffset; - if (!font) + if (!font || font->width > sb->width) return; yoffset = sb->height - font->height - (sb->height % font->height) / 2; From 23437509a69476d4f896891032d62ac868731668 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:53 +0200 Subject: [PATCH 0210/1075] drm/panic: Fix 24bit pixel crossing page boundaries When using page list framebuffer, and using RGB888 format, some pixels can cross the page boundaries, and this case was not handled, leading to writing 1 or 2 bytes on the next virtual address. Add a check and a specific function to handle this case. Fixes: c9ff2808790f0 ("drm/panic: Add support to scanout buffer as array of pages") Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-7-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 46 +++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index bc5158683b2b..d4b6ea42db0f 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -174,6 +174,33 @@ static void drm_panic_write_pixel24(void *vaddr, unsigned int offset, u32 color) *p = color & 0xff; } +/* + * Special case if the pixel crosses page boundaries + */ +static void drm_panic_write_pixel24_xpage(void *vaddr, struct page *next_page, + unsigned int offset, u32 color) +{ + u8 *vaddr2; + u8 *p = vaddr + offset; + + vaddr2 = kmap_local_page_try_from_panic(next_page); + + *p++ = color & 0xff; + color >>= 8; + + if (offset == PAGE_SIZE - 1) + p = vaddr2; + + *p++ = color & 0xff; + color >>= 8; + + if (offset == PAGE_SIZE - 2) + p = vaddr2; + + *p = color & 0xff; + kunmap_local(vaddr2); +} + static void drm_panic_write_pixel32(void *vaddr, unsigned int offset, u32 color) { u32 *p = vaddr + offset; @@ -231,7 +258,14 @@ static void drm_panic_blit_page(struct page **pages, unsigned int dpitch, page = new_page; vaddr = kmap_local_page_try_from_panic(pages[page]); } - if (vaddr) + if (!vaddr) + continue; + + // Special case for 24bit, as a pixel might cross page boundaries + if (cpp == 3 && offset + 3 > PAGE_SIZE) + drm_panic_write_pixel24_xpage(vaddr, pages[page + 1], + offset, fg32); + else drm_panic_write_pixel(vaddr, offset, fg32, cpp); } } @@ -321,7 +355,15 @@ static void drm_panic_fill_page(struct page **pages, unsigned int dpitch, page = new_page; vaddr = kmap_local_page_try_from_panic(pages[page]); } - drm_panic_write_pixel(vaddr, offset, color, cpp); + if (!vaddr) + continue; + + // Special case for 24bit, as a pixel might cross page boundaries + if (cpp == 3 && offset + 3 > PAGE_SIZE) + drm_panic_write_pixel24_xpage(vaddr, pages[page + 1], + offset, color); + else + drm_panic_write_pixel(vaddr, offset, color, cpp); } } if (vaddr) From a8c861f401b4b2f8feda282abff929fa91c1f73a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Oct 2025 15:29:30 +0900 Subject: [PATCH 0211/1075] xfs: avoid busy loops in GCD When GCD has no new work to handle, but read, write or reset commands are outstanding, it currently busy loops, which is a bit suboptimal, and can lead to softlockup warnings in case of stuck commands. Change the code so that the task state is only set to running when work is performed, which looks a bit tricky due to the design of the reading/writing/resetting lists that contain both in-flight and finished commands. Fixes: 080d01c41d44 ("xfs: implement zoned garbage collection") Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 81 +++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 064cd1a857a0..109877d9a6bf 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -491,21 +491,6 @@ xfs_zone_gc_select_victim( struct xfs_rtgroup *victim_rtg = NULL; unsigned int bucket; - if (xfs_is_shutdown(mp)) - return false; - - if (iter->victim_rtg) - return true; - - /* - * Don't start new work if we are asked to stop or park. - */ - if (kthread_should_stop() || kthread_should_park()) - return false; - - if (!xfs_zoned_need_gc(mp)) - return false; - spin_lock(&zi->zi_used_buckets_lock); for (bucket = 0; bucket < XFS_ZONE_USED_BUCKETS; bucket++) { victim_rtg = xfs_zone_gc_pick_victim_from(mp, bucket); @@ -975,6 +960,27 @@ xfs_zone_gc_reset_zones( } while (next); } +static bool +xfs_zone_gc_should_start_new_work( + struct xfs_zone_gc_data *data) +{ + if (xfs_is_shutdown(data->mp)) + return false; + if (!xfs_zone_gc_space_available(data)) + return false; + + if (!data->iter.victim_rtg) { + if (kthread_should_stop() || kthread_should_park()) + return false; + if (!xfs_zoned_need_gc(data->mp)) + return false; + if (!xfs_zone_gc_select_victim(data)) + return false; + } + + return true; +} + /* * Handle the work to read and write data for GC and to reset the zones, * including handling all completions. @@ -982,7 +988,7 @@ xfs_zone_gc_reset_zones( * Note that the order of the chunks is preserved so that we don't undo the * optimal order established by xfs_zone_gc_query(). */ -static bool +static void xfs_zone_gc_handle_work( struct xfs_zone_gc_data *data) { @@ -996,30 +1002,22 @@ xfs_zone_gc_handle_work( zi->zi_reset_list = NULL; spin_unlock(&zi->zi_reset_list_lock); - if (!xfs_zone_gc_select_victim(data) || - !xfs_zone_gc_space_available(data)) { - if (list_empty(&data->reading) && - list_empty(&data->writing) && - list_empty(&data->resetting) && - !reset_list) - return false; - } - - __set_current_state(TASK_RUNNING); - try_to_freeze(); - - if (reset_list) + if (reset_list) { + set_current_state(TASK_RUNNING); xfs_zone_gc_reset_zones(data, reset_list); + } list_for_each_entry_safe(chunk, next, &data->resetting, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; + set_current_state(TASK_RUNNING); xfs_zone_gc_finish_reset(chunk); } list_for_each_entry_safe(chunk, next, &data->writing, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; + set_current_state(TASK_RUNNING); xfs_zone_gc_finish_chunk(chunk); } @@ -1027,15 +1025,18 @@ xfs_zone_gc_handle_work( list_for_each_entry_safe(chunk, next, &data->reading, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; + set_current_state(TASK_RUNNING); xfs_zone_gc_write_chunk(chunk); } blk_finish_plug(&plug); - blk_start_plug(&plug); - while (xfs_zone_gc_start_chunk(data)) - ; - blk_finish_plug(&plug); - return true; + if (xfs_zone_gc_should_start_new_work(data)) { + set_current_state(TASK_RUNNING); + blk_start_plug(&plug); + while (xfs_zone_gc_start_chunk(data)) + ; + blk_finish_plug(&plug); + } } /* @@ -1059,8 +1060,18 @@ xfs_zoned_gcd( for (;;) { set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE); xfs_set_zonegc_running(mp); - if (xfs_zone_gc_handle_work(data)) + + xfs_zone_gc_handle_work(data); + + /* + * Only sleep if nothing set the state to running. Else check for + * work again as someone might have queued up more work and woken + * us in the meantime. + */ + if (get_current_state() == TASK_RUNNING) { + try_to_freeze(); continue; + } if (list_empty(&data->reading) && list_empty(&data->writing) && From ca3d643a970139f5456f90dd555a0955752d70cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Oct 2025 05:55:41 +0200 Subject: [PATCH 0212/1075] xfs: cache open zone in inode->i_private The MRU cache for open zones is unfortunately still not ideal, as it can time out pretty easily when doing heavy I/O to hard disks using up most or all open zones. One option would be to just increase the timeout, but while looking into that I realized we're just better off caching it indefinitely as there is no real downside to that once we don't hold a reference to the cache open zone. So switch the open zone to RCU freeing, and then stash the last used open zone into inode->i_private. This helps to significantly reduce fragmentation by keeping I/O localized to zones for workloads that write using many open files to HDD. Fixes: 4e4d52075577 ("xfs: add the zoned space allocator") Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_mount.h | 1 - fs/xfs/xfs_super.c | 6 ++ fs/xfs/xfs_zone_alloc.c | 129 ++++++++++++++-------------------------- fs/xfs/xfs_zone_priv.h | 2 + 4 files changed, 53 insertions(+), 85 deletions(-) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f046d1215b04..b871dfde372b 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -236,7 +236,6 @@ typedef struct xfs_mount { bool m_update_sb; /* sb needs update in mount */ unsigned int m_max_open_zones; unsigned int m_zonegc_low_space; - struct xfs_mru_cache *m_zone_cache; /* Inode to open zone cache */ /* max_atomic_write mount option value */ unsigned long long m_awu_max_bytes; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e85a156dc17d..464ae1e657d9 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -786,6 +786,12 @@ xfs_fs_evict_inode( truncate_inode_pages_final(&inode->i_data); clear_inode(inode); + + if (IS_ENABLED(CONFIG_XFS_RT) && + S_ISREG(inode->i_mode) && inode->i_private) { + xfs_open_zone_put(inode->i_private); + inode->i_private = NULL; + } } static void diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 1b462cd5d8fa..23cdab4515bb 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -26,14 +26,22 @@ #include "xfs_trace.h" #include "xfs_mru_cache.h" +static void +xfs_open_zone_free_rcu( + struct callback_head *cb) +{ + struct xfs_open_zone *oz = container_of(cb, typeof(*oz), oz_rcu); + + xfs_rtgroup_rele(oz->oz_rtg); + kfree(oz); +} + void xfs_open_zone_put( struct xfs_open_zone *oz) { - if (atomic_dec_and_test(&oz->oz_ref)) { - xfs_rtgroup_rele(oz->oz_rtg); - kfree(oz); - } + if (atomic_dec_and_test(&oz->oz_ref)) + call_rcu(&oz->oz_rcu, xfs_open_zone_free_rcu); } static inline uint32_t @@ -756,98 +764,55 @@ xfs_mark_rtg_boundary( ioend->io_flags |= IOMAP_IOEND_BOUNDARY; } -/* - * Cache the last zone written to for an inode so that it is considered first - * for subsequent writes. - */ -struct xfs_zone_cache_item { - struct xfs_mru_cache_elem mru; - struct xfs_open_zone *oz; -}; - -static inline struct xfs_zone_cache_item * -xfs_zone_cache_item(struct xfs_mru_cache_elem *mru) -{ - return container_of(mru, struct xfs_zone_cache_item, mru); -} - -static void -xfs_zone_cache_free_func( - void *data, - struct xfs_mru_cache_elem *mru) -{ - struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru); - - xfs_open_zone_put(item->oz); - kfree(item); -} - /* * Check if we have a cached last open zone available for the inode and * if yes return a reference to it. */ static struct xfs_open_zone * -xfs_cached_zone( - struct xfs_mount *mp, - struct xfs_inode *ip) +xfs_get_cached_zone( + struct xfs_inode *ip) { - struct xfs_mru_cache_elem *mru; - struct xfs_open_zone *oz; + struct xfs_open_zone *oz; - mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino); - if (!mru) - return NULL; - oz = xfs_zone_cache_item(mru)->oz; + rcu_read_lock(); + oz = VFS_I(ip)->i_private; if (oz) { /* * GC only steals open zones at mount time, so no GC zones * should end up in the cache. */ ASSERT(!oz->oz_is_gc); - ASSERT(atomic_read(&oz->oz_ref) > 0); - atomic_inc(&oz->oz_ref); + if (!atomic_inc_not_zero(&oz->oz_ref)) + oz = NULL; } - xfs_mru_cache_done(mp->m_zone_cache); + rcu_read_unlock(); + return oz; } /* - * Update the last used zone cache for a given inode. + * Stash our zone in the inode so that is is reused for future allocations. * - * The caller must have a reference on the open zone. + * The open_zone structure will be pinned until either the inode is freed or + * until the cached open zone is replaced with a different one because the + * current one was full when we tried to use it. This means we keep any + * open zone around forever as long as any inode that used it for the last + * write is cached, which slightly increases the memory use of cached inodes + * that were every written to, but significantly simplifies the cached zone + * lookup. Because the open_zone is clearly marked as full when all data + * in the underlying RTG was written, the caching is always safe. */ static void -xfs_zone_cache_create_association( - struct xfs_inode *ip, - struct xfs_open_zone *oz) +xfs_set_cached_zone( + struct xfs_inode *ip, + struct xfs_open_zone *oz) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_zone_cache_item *item = NULL; - struct xfs_mru_cache_elem *mru; + struct xfs_open_zone *old_oz; - ASSERT(atomic_read(&oz->oz_ref) > 0); atomic_inc(&oz->oz_ref); - - mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino); - if (mru) { - /* - * If we have an association already, update it to point to the - * new zone. - */ - item = xfs_zone_cache_item(mru); - xfs_open_zone_put(item->oz); - item->oz = oz; - xfs_mru_cache_done(mp->m_zone_cache); - return; - } - - item = kmalloc(sizeof(*item), GFP_KERNEL); - if (!item) { - xfs_open_zone_put(oz); - return; - } - item->oz = oz; - xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); + old_oz = xchg(&VFS_I(ip)->i_private, oz); + if (old_oz) + xfs_open_zone_put(old_oz); } static void @@ -891,15 +856,14 @@ xfs_zone_alloc_and_submit( * the inode is still associated with a zone and use that if so. */ if (!*oz) - *oz = xfs_cached_zone(mp, ip); + *oz = xfs_get_cached_zone(ip); if (!*oz) { select_zone: *oz = xfs_select_zone(mp, write_hint, pack_tight); if (!*oz) goto out_error; - - xfs_zone_cache_create_association(ip, *oz); + xfs_set_cached_zone(ip, *oz); } alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size), @@ -977,6 +941,12 @@ xfs_free_open_zones( xfs_open_zone_put(oz); } spin_unlock(&zi->zi_open_zones_lock); + + /* + * Wait for all open zones to be freed so that they drop the group + * references: + */ + rcu_barrier(); } struct xfs_init_zones { @@ -1290,14 +1260,6 @@ xfs_mount_zones( error = xfs_zone_gc_mount(mp); if (error) goto out_free_zone_info; - - /* - * Set up a mru cache to track inode to open zone for data placement - * purposes. The magic values for group count and life time is the - * same as the defaults for file streams, which seems sane enough. - */ - xfs_mru_cache_create(&mp->m_zone_cache, mp, - 5000, 10, xfs_zone_cache_free_func); return 0; out_free_zone_info: @@ -1311,5 +1273,4 @@ xfs_unmount_zones( { xfs_zone_gc_unmount(mp); xfs_free_zone_info(mp->m_zone_info); - xfs_mru_cache_destroy(mp->m_zone_cache); } diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h index 35e6de3d25ed..4322e26dd99a 100644 --- a/fs/xfs/xfs_zone_priv.h +++ b/fs/xfs/xfs_zone_priv.h @@ -44,6 +44,8 @@ struct xfs_open_zone { * the life time of an open zone. */ struct xfs_rtgroup *oz_rtg; + + struct rcu_head oz_rcu; }; /* From 0f41997b1b2b769b73415512d2afaae80630e4fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Oct 2025 12:12:48 +0200 Subject: [PATCH 0213/1075] xfs: don't use __GFP_NOFAIL in xfs_init_fs_context With enough debug options enabled, struct xfs_mount is larger than 4k and thus NOFAIL allocations won't work for it. xfs_init_fs_context is early in the mount process, and if we really are out of memory there we'd better give up ASAP anyway. Fixes: 7b77b46a6137 ("xfs: use kmem functions for struct xfs_mount") Reported-by: syzbot+359a67b608de1ef72f65@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 464ae1e657d9..9d51186b24dd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2227,7 +2227,7 @@ xfs_init_fs_context( struct xfs_mount *mp; int i; - mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL); + mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) return -ENOMEM; From c4d35e635f3a65aec291a6045cae8c99cede5bba Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Mon, 20 Oct 2025 17:51:44 +0900 Subject: [PATCH 0214/1075] gpio: 104-idio-16: Define maximum valid register address offset Attempting to load the 104-idio-16 module fails during regmap initialization with a return error -EINVAL. This is a result of the regmap cache failing initialization. Set the idio_16_regmap_config max_register member to fix this failure. Fixes: 2c210c9a34a3 ("gpio: 104-idio-16: Migrate to the regmap API") Reported-by: Mark Cave-Ayland Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com Suggested-by: Mark Cave-Ayland Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-1-ebeb50e93c33@kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-104-idio-16.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c index ffe7e1cb6b23..fe5c10cd5c32 100644 --- a/drivers/gpio/gpio-104-idio-16.c +++ b/drivers/gpio/gpio-104-idio-16.c @@ -59,6 +59,7 @@ static const struct regmap_config idio_16_regmap_config = { .reg_stride = 1, .val_bits = 8, .io_port = true, + .max_register = 0x5, .wr_table = &idio_16_wr_table, .rd_table = &idio_16_rd_table, .volatile_table = &idio_16_rd_table, From d37623132a6347b4ab9e2179eb3f2fa77863c364 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Mon, 20 Oct 2025 17:51:45 +0900 Subject: [PATCH 0215/1075] gpio: pci-idio-16: Define maximum valid register address offset Attempting to load the pci-idio-16 module fails during regmap initialization with a return error -EINVAL. This is a result of the regmap cache failing initialization. Set the idio_16_regmap_config max_register member to fix this failure. Fixes: 73d8f3efc5c2 ("gpio: pci-idio-16: Migrate to the regmap API") Reported-by: Mark Cave-Ayland Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com Suggested-by: Mark Cave-Ayland Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-2-ebeb50e93c33@kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pci-idio-16.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c index 476cea1b5ed7..9d28ca8e1d6f 100644 --- a/drivers/gpio/gpio-pci-idio-16.c +++ b/drivers/gpio/gpio-pci-idio-16.c @@ -41,6 +41,7 @@ static const struct regmap_config idio_16_regmap_config = { .reg_stride = 1, .val_bits = 8, .io_port = true, + .max_register = 0x7, .wr_table = &idio_16_wr_table, .rd_table = &idio_16_rd_table, .volatile_table = &idio_16_rd_table, From 876f0d43af78639790bee0e57b39d498ae35adcf Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Mon, 20 Oct 2025 15:41:24 +0100 Subject: [PATCH 0216/1075] x86/microcode: Fix Entrysign revision check for Zen1/Naples ... to match AMD's statement here: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-7033.html Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") Signed-off-by: Andrew Cooper Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://patch.msgid.link/20251020144124.2930784-1-andrew.cooper3@citrix.com --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index cdce885e2fd5..28ed8c089024 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -194,7 +194,7 @@ static bool need_sha_check(u32 cur_rev) } switch (cur_rev >> 8) { - case 0x80012: return cur_rev <= 0x800126f; break; + case 0x80012: return cur_rev <= 0x8001277; break; case 0x80082: return cur_rev <= 0x800820f; break; case 0x83010: return cur_rev <= 0x830107c; break; case 0x86001: return cur_rev <= 0x860010e; break; From 49d34f3dd8519581030547eb7543a62f9ab5fa08 Mon Sep 17 00:00:00 2001 From: Aksh Garg Date: Thu, 16 Oct 2025 17:27:55 +0530 Subject: [PATCH 0217/1075] net: ethernet: ti: am65-cpts: fix timestamp loss due to race conditions Resolve race conditions in timestamp events list handling between TX and RX paths causing missed timestamps. The current implementation uses a single events list for both TX and RX timestamps. The am65_cpts_find_ts() function acquires the lock, splices all events (TX as well as RX events) to a temporary list, and releases the lock. This function performs matching of timestamps for TX packets only. Before it acquires the lock again to put the non-TX events back to the main events list, a concurrent RX processing thread could acquire the lock (as observed in practice), find an empty events list, and fail to attach timestamp to it, even though a relevant event exists in the spliced list which is yet to be restored to the main list. Fix this by creating separate events lists to handle TX and RX timestamps independently. Fixes: c459f606f66df ("net: ethernet: ti: am65-cpts: Enable RX HW timestamp for PTP packets using CPTS FIFO") Signed-off-by: Aksh Garg Reviewed-by: Siddharth Vadapalli Link: https://patch.msgid.link/20251016115755.1123646-1-a-garg7@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/am65-cpts.c | 63 ++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c index 59d6ab989c55..8ffbfaa3ab18 100644 --- a/drivers/net/ethernet/ti/am65-cpts.c +++ b/drivers/net/ethernet/ti/am65-cpts.c @@ -163,7 +163,9 @@ struct am65_cpts { struct device_node *clk_mux_np; struct clk *refclk; u32 refclk_freq; - struct list_head events; + /* separate lists to handle TX and RX timestamp independently */ + struct list_head events_tx; + struct list_head events_rx; struct list_head pool; struct am65_cpts_event pool_data[AM65_CPTS_MAX_EVENTS]; spinlock_t lock; /* protects events lists*/ @@ -227,6 +229,24 @@ static void am65_cpts_disable(struct am65_cpts *cpts) am65_cpts_write32(cpts, 0, int_enable); } +static int am65_cpts_purge_event_list(struct am65_cpts *cpts, + struct list_head *events) +{ + struct list_head *this, *next; + struct am65_cpts_event *event; + int removed = 0; + + list_for_each_safe(this, next, events) { + event = list_entry(this, struct am65_cpts_event, list); + if (time_after(jiffies, event->tmo)) { + list_del_init(&event->list); + list_add(&event->list, &cpts->pool); + ++removed; + } + } + return removed; +} + static int am65_cpts_event_get_port(struct am65_cpts_event *event) { return (event->event1 & AM65_CPTS_EVENT_1_PORT_NUMBER_MASK) >> @@ -239,20 +259,12 @@ static int am65_cpts_event_get_type(struct am65_cpts_event *event) AM65_CPTS_EVENT_1_EVENT_TYPE_SHIFT; } -static int am65_cpts_cpts_purge_events(struct am65_cpts *cpts) +static int am65_cpts_purge_events(struct am65_cpts *cpts) { - struct list_head *this, *next; - struct am65_cpts_event *event; int removed = 0; - list_for_each_safe(this, next, &cpts->events) { - event = list_entry(this, struct am65_cpts_event, list); - if (time_after(jiffies, event->tmo)) { - list_del_init(&event->list); - list_add(&event->list, &cpts->pool); - ++removed; - } - } + removed += am65_cpts_purge_event_list(cpts, &cpts->events_tx); + removed += am65_cpts_purge_event_list(cpts, &cpts->events_rx); if (removed) dev_dbg(cpts->dev, "event pool cleaned up %d\n", removed); @@ -287,7 +299,7 @@ static int __am65_cpts_fifo_read(struct am65_cpts *cpts) struct am65_cpts_event, list); if (!event) { - if (am65_cpts_cpts_purge_events(cpts)) { + if (am65_cpts_purge_events(cpts)) { dev_err(cpts->dev, "cpts: event pool empty\n"); ret = -1; goto out; @@ -306,11 +318,21 @@ static int __am65_cpts_fifo_read(struct am65_cpts *cpts) cpts->timestamp); break; case AM65_CPTS_EV_RX: + event->tmo = jiffies + + msecs_to_jiffies(AM65_CPTS_EVENT_RX_TX_TIMEOUT); + + list_move_tail(&event->list, &cpts->events_rx); + + dev_dbg(cpts->dev, + "AM65_CPTS_EV_RX e1:%08x e2:%08x t:%lld\n", + event->event1, event->event2, + event->timestamp); + break; case AM65_CPTS_EV_TX: event->tmo = jiffies + msecs_to_jiffies(AM65_CPTS_EVENT_RX_TX_TIMEOUT); - list_move_tail(&event->list, &cpts->events); + list_move_tail(&event->list, &cpts->events_tx); dev_dbg(cpts->dev, "AM65_CPTS_EV_TX e1:%08x e2:%08x t:%lld\n", @@ -828,7 +850,7 @@ static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts, return found; } -static void am65_cpts_find_ts(struct am65_cpts *cpts) +static void am65_cpts_find_tx_ts(struct am65_cpts *cpts) { struct am65_cpts_event *event; struct list_head *this, *next; @@ -837,7 +859,7 @@ static void am65_cpts_find_ts(struct am65_cpts *cpts) LIST_HEAD(events); spin_lock_irqsave(&cpts->lock, flags); - list_splice_init(&cpts->events, &events); + list_splice_init(&cpts->events_tx, &events); spin_unlock_irqrestore(&cpts->lock, flags); list_for_each_safe(this, next, &events) { @@ -850,7 +872,7 @@ static void am65_cpts_find_ts(struct am65_cpts *cpts) } spin_lock_irqsave(&cpts->lock, flags); - list_splice_tail(&events, &cpts->events); + list_splice_tail(&events, &cpts->events_tx); list_splice_tail(&events_free, &cpts->pool); spin_unlock_irqrestore(&cpts->lock, flags); } @@ -861,7 +883,7 @@ static long am65_cpts_ts_work(struct ptp_clock_info *ptp) unsigned long flags; long delay = -1; - am65_cpts_find_ts(cpts); + am65_cpts_find_tx_ts(cpts); spin_lock_irqsave(&cpts->txq.lock, flags); if (!skb_queue_empty(&cpts->txq)) @@ -905,7 +927,7 @@ static u64 am65_cpts_find_rx_ts(struct am65_cpts *cpts, u32 skb_mtype_seqid) spin_lock_irqsave(&cpts->lock, flags); __am65_cpts_fifo_read(cpts); - list_for_each_safe(this, next, &cpts->events) { + list_for_each_safe(this, next, &cpts->events_rx) { event = list_entry(this, struct am65_cpts_event, list); if (time_after(jiffies, event->tmo)) { list_move(&event->list, &cpts->pool); @@ -1155,7 +1177,8 @@ struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs, return ERR_PTR(ret); mutex_init(&cpts->ptp_clk_lock); - INIT_LIST_HEAD(&cpts->events); + INIT_LIST_HEAD(&cpts->events_tx); + INIT_LIST_HEAD(&cpts->events_rx); INIT_LIST_HEAD(&cpts->pool); spin_lock_init(&cpts->lock); skb_queue_head_init(&cpts->txq); From 204ced4108f5d38f6804968fd9543cc69c3f8da6 Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Fri, 3 Oct 2025 12:19:36 -0500 Subject: [PATCH 0218/1075] x86/bugs: Qualify RETBLEED_INTEL_MSG When retbleed mitigation is disabled, the kernel already prints an info message that the system is vulnerable. Recent code restructuring also inadvertently led to RETBLEED_INTEL_MSG being printed as an error, which is unnecessary as retbleed mitigation was already explicitly disabled (by config option, cmdline, etc.). Qualify this print statement so the warning is not printed unless an actual retbleed mitigation was selected and is being disabled due to incompatibility with spectre_v2. Fixes: e3b78a7ad5ea ("x86/bugs: Restructure retbleed mitigation") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220624 Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Link: https://patch.msgid.link/20251003171936.155391-1-david.kaplan@amd.com --- arch/x86/kernel/cpu/bugs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6a526ae1fe99..e08de5b0d20b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1463,7 +1463,9 @@ static void __init retbleed_update_mitigation(void) break; default: if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) { - pr_err(RETBLEED_INTEL_MSG); + if (retbleed_mitigation != RETBLEED_MITIGATION_NONE) + pr_err(RETBLEED_INTEL_MSG); + retbleed_mitigation = RETBLEED_MITIGATION_NONE; } } From a7b17ece4032dd86bb411297f2169dda395cdc3c Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 10 Oct 2025 15:38:56 +0200 Subject: [PATCH 0219/1075] mmc: wmt-sdmmc: fix compile test default Enabling compile testing should not enable every individual driver (we have "allyesconfig" for that). Fixes: 7cd8db0fb0b2 ("mmc: add COMPILE_TEST to multiple drivers") Cc: Mikko Rapeli Signed-off-by: Johan Hovold Signed-off-by: Ulf Hansson --- drivers/mmc/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 2c963cb6724b..10d0ef58ef49 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -950,7 +950,7 @@ config MMC_USHC config MMC_WMT tristate "Wondermedia SD/MMC Host Controller support" depends on ARCH_VT8500 || COMPILE_TEST - default y + default ARCH_VT8500 help This selects support for the SD/MMC Host Controller on Wondermedia WM8505/WM8650 based SoCs. From 0778ac7df5137d5041783fadfc201f8fd55a1d9b Mon Sep 17 00:00:00 2001 From: Zhen Ni Date: Mon, 13 Oct 2025 19:41:51 +0800 Subject: [PATCH 0220/1075] fs: Fix uninitialized 'offp' in statmount_string() In statmount_string(), most flags assign an output offset pointer (offp) which is later updated with the string offset. However, the STATMOUNT_MNT_UIDMAP and STATMOUNT_MNT_GIDMAP cases directly set the struct fields instead of using offp. This leaves offp uninitialized, leading to a possible uninitialized dereference when *offp is updated. Fix it by assigning offp for UIDMAP and GIDMAP as well, keeping the code path consistent. Fixes: 37c4a9590e1e ("statmount: allow to retrieve idmappings") Fixes: e52e97f09fb6 ("statmount: let unset strings be empty") Cc: stable@vger.kernel.org Signed-off-by: Zhen Ni Link: https://patch.msgid.link/20251013114151.664341-1-zhen.ni@easystack.cn Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/namespace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index d82910f33dc4..5b5ab2ae238b 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5454,11 +5454,11 @@ static int statmount_string(struct kstatmount *s, u64 flag) ret = statmount_sb_source(s, seq); break; case STATMOUNT_MNT_UIDMAP: - sm->mnt_uidmap = start; + offp = &sm->mnt_uidmap; ret = statmount_mnt_uidmap(s, seq); break; case STATMOUNT_MNT_GIDMAP: - sm->mnt_gidmap = start; + offp = &sm->mnt_gidmap; ret = statmount_mnt_gidmap(s, seq); break; default: From 2c2b67af5f5f77fc68261a137ad65dcfb8e52506 Mon Sep 17 00:00:00 2001 From: Hongbo Li Date: Sat, 11 Oct 2025 09:22:35 +0000 Subject: [PATCH 0221/1075] hostfs: Fix only passing host root in boot stage with new mount In the old mount proceedure, hostfs could only pass root directory during boot. This is because it constructed the root directory using the @root_ino event without any mount options. However, when using it with the new mount API, this step is no longer triggered. As a result, if users mounts without specifying any mount options, the @host_root_path remains uninitialized. To prevent this issue, the @host_root_path should be initialized at the time of allocation. Reported-by: Geoffrey Thorpe Closes: https://lore.kernel.org/all/643333a0-f434-42fb-82ac-d25a0b56f3b7@geoffthorpe.net/ Fixes: cd140ce9f611 ("hostfs: convert hostfs to use the new mount API") Signed-off-by: Hongbo Li Link: https://patch.msgid.link/20251011092235.29880-1-lihongbo22@huawei.com Signed-off-by: Christian Brauner --- fs/hostfs/hostfs_kern.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 1e1acf5775ab..86455eebbf6c 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -979,7 +979,7 @@ static int hostfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct hostfs_fs_info *fsi = fc->s_fs_info; struct fs_parse_result result; - char *host_root; + char *host_root, *tmp_root; int opt; opt = fs_parse(fc, hostfs_param_specs, param, &result); @@ -990,11 +990,13 @@ static int hostfs_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_hostfs: host_root = param->string; if (!*host_root) - host_root = ""; - fsi->host_root_path = - kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); - if (fsi->host_root_path == NULL) + break; + tmp_root = kasprintf(GFP_KERNEL, "%s%s", + fsi->host_root_path, host_root); + if (!tmp_root) return -ENOMEM; + kfree(fsi->host_root_path); + fsi->host_root_path = tmp_root; break; } @@ -1004,17 +1006,17 @@ static int hostfs_parse_param(struct fs_context *fc, struct fs_parameter *param) static int hostfs_parse_monolithic(struct fs_context *fc, void *data) { struct hostfs_fs_info *fsi = fc->s_fs_info; - char *host_root = (char *)data; + char *tmp_root, *host_root = (char *)data; /* NULL is printed as '(null)' by printf(): avoid that. */ if (host_root == NULL) - host_root = ""; + return 0; - fsi->host_root_path = - kasprintf(GFP_KERNEL, "%s/%s", root_ino, host_root); - if (fsi->host_root_path == NULL) + tmp_root = kasprintf(GFP_KERNEL, "%s%s", fsi->host_root_path, host_root); + if (!tmp_root) return -ENOMEM; - + kfree(fsi->host_root_path); + fsi->host_root_path = tmp_root; return 0; } @@ -1049,6 +1051,11 @@ static int hostfs_init_fs_context(struct fs_context *fc) if (!fsi) return -ENOMEM; + fsi->host_root_path = kasprintf(GFP_KERNEL, "%s/", root_ino); + if (!fsi->host_root_path) { + kfree(fsi); + return -ENOMEM; + } fc->s_fs_info = fsi; fc->ops = &hostfs_context_ops; return 0; From 6ed8bfd24ce1cb31742b09a3eb557cd008533eec Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Tue, 21 Oct 2025 09:03:53 +0800 Subject: [PATCH 0222/1075] slab: Avoid race on slab->obj_exts in alloc_slab_obj_exts If two competing threads enter alloc_slab_obj_exts() and one of them fails to allocate the object extension vector, it might override the valid slab->obj_exts allocated by the other thread with OBJEXTS_ALLOC_FAIL. This will cause the thread that lost this race and expects a valid pointer to dereference a NULL pointer later on. Update slab->obj_exts atomically using cmpxchg() to avoid slab->obj_exts overrides by racing threads. Thanks for Vlastimil and Suren's help with debugging. Fixes: f7381b911640 ("slab: mark slab->obj_exts allocation failures unconditionally") Cc: Suggested-by: Suren Baghdasaryan Signed-off-by: Hao Ge Reviewed-by: Harry Yoo Reviewed-by: Suren Baghdasaryan Link: https://patch.msgid.link/20251021010353.1187193-1-hao.ge@linux.dev Signed-off-by: Vlastimil Babka --- mm/slub.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a8fcc7e6f25a..23d8f54e9486 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2054,7 +2054,7 @@ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) static inline void mark_failed_objexts_alloc(struct slab *slab) { - slab->obj_exts = OBJEXTS_ALLOC_FAIL; + cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL); } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, @@ -2136,6 +2136,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, #ifdef CONFIG_MEMCG new_exts |= MEMCG_DATA_OBJEXTS; #endif +retry: old_exts = READ_ONCE(slab->obj_exts); handle_failed_objexts_alloc(old_exts, vec, objects); if (new_slab) { @@ -2145,8 +2146,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, * be simply assigned. */ slab->obj_exts = new_exts; - } else if ((old_exts & ~OBJEXTS_FLAGS_MASK) || - cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) { + } else if (old_exts & ~OBJEXTS_FLAGS_MASK) { /* * If the slab is already in use, somebody can allocate and * assign slabobj_exts in parallel. In this case the existing @@ -2158,6 +2158,9 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, else kfree(vec); return 0; + } else if (cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) { + /* Retry if a racing thread changed slab->obj_exts from under us. */ + goto retry; } if (allow_spin) From 89939cf252d80237ed380c1d20575ecfe56ff894 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Mon, 29 Sep 2025 14:28:34 -0400 Subject: [PATCH 0223/1075] drm/amd/display: Fix NULL pointer dereference [Why] On a mst branch with multi display setup, dc context is obselete after updating the first stream. Referencing the same dc context for the next stream update to fetch dc pointer leads to NULL pointer dereference. [How] Get the dc pointer from the link rather than context. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Charlene Liu Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher (cherry picked from commit dc69b48988b171d6ccb3a083607e4dff015e2c0d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 9e33bf937a69..2676ae9f6fe8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -78,6 +78,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, struct audio_output audio_output[MAX_PIPES]; struct dc_stream_state *streams_on_link[MAX_PIPES]; int num_streams_on_link = 0; + struct dc *dc = (struct dc *)link->dc; needs_divider_update = (link->dc->link_srv->dp_get_encoding_format(link_setting) != link->dc->link_srv->dp_get_encoding_format((const struct dc_link_settings *) &link->cur_link_settings)); @@ -150,7 +151,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, if (streams_on_link[i] && streams_on_link[i]->link && streams_on_link[i]->link == link) { stream_update.stream = streams_on_link[i]; stream_update.dpms_off = &dpms_off; - dc_update_planes_and_stream(state->clk_mgr->ctx->dc, NULL, 0, streams_on_link[i], &stream_update); + dc_update_planes_and_stream(dc, NULL, 0, streams_on_link[i], &stream_update); } } } From bec947cbe9a65783adb475a5fb47980d7b4f4796 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 29 Sep 2025 20:29:30 -0400 Subject: [PATCH 0224/1075] drm/amd/display: increase max link count and fix link->enc NULL pointer access [why] 1.) dc->links[MAX_LINKS] array size smaller than actual requested. max_connector + max_dpia + 4 virtual = 14. increase from 12 to 14. 2.) hw_init() access null LINK_ENC for dpia non display_endpoint. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Meenakshikumar Somasundaram Reviewed-by: Chris Park Signed-off-by: Charlene Liu Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher (cherry picked from commit d7f5a61e1b04ed87b008c8d327649d184dc5bb45) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 3 +++ drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 7c276c319086..ce3d0b45fb4c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -200,6 +200,9 @@ void dcn401_init_hw(struct dc *dc) */ struct dc_link *link = dc->links[i]; + if (link->ep_type != DISPLAY_ENDPOINT_PHY) + continue; + link->link_enc->funcs->hw_init(link->link_enc); /* Check for enabled DIG to identify enabled display */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 41c76ba9ba56..62a39204fe0b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -44,7 +44,13 @@ */ #define MAX_PIPES 6 #define MAX_PHANTOM_PIPES (MAX_PIPES / 2) -#define MAX_LINKS (MAX_PIPES * 2 +2) + +#define MAX_DPIA 6 +#define MAX_CONNECTOR 6 +#define MAX_VIRTUAL_LINKS 4 + +#define MAX_LINKS (MAX_DPIA + MAX_CONNECTOR + MAX_VIRTUAL_LINKS) + #define MAX_DIG_LINK_ENCODERS 7 #define MAX_DWB_PIPES 1 #define MAX_HPO_DP2_ENCODERS 4 From 72a1eb3cf573ab957ae412f0efb0cf6ff0876234 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 25 Sep 2025 10:23:59 -0400 Subject: [PATCH 0225/1075] drm/amd/display: use GFP_NOWAIT for allocation in interrupt handler schedule_dc_vmin_vmax() is called by dm_crtc_high_irq(). Hence, we cannot have the former sleep. Use GFP_NOWAIT for allocation in this function. Fixes: c210b757b400 ("drm/amd/display: fix dmub access race condition") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Sun peng (Leo) Li Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher (cherry picked from commit c04812cbe2f247a1c1e53a9b6c5e659963fe4065) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6597475e245d..bfa3199591b6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -551,13 +551,13 @@ static void schedule_dc_vmin_vmax(struct amdgpu_device *adev, struct dc_stream_state *stream, struct dc_crtc_timing_adjust *adjust) { - struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_KERNEL); + struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_NOWAIT); if (!offload_work) { drm_dbg_driver(adev_to_drm(adev), "Failed to allocate vupdate_offload_work\n"); return; } - struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_KERNEL); + struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_NOWAIT); if (!adjust_copy) { drm_dbg_driver(adev_to_drm(adev), "Failed to allocate adjust_copy\n"); kfree(offload_work); From 90c82941adf1986364e0f82c35cf59f2bf5f6a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Draszik?= Date: Thu, 16 Oct 2025 16:58:37 +0100 Subject: [PATCH 0226/1075] pmdomain: samsung: plug potential memleak during probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit of_genpd_add_provider_simple() could fail, in which case this code leaks the domain name, pd->pd.name. Use devm_kstrdup_const() to plug this leak. As a side-effect, we can simplify existing error handling. Fixes: c09a3e6c97f0 ("soc: samsung: pm_domains: Convert to regular platform driver") Cc: stable@vger.kernel.org Reviewed-by: Peter Griffin Reviewed-by: Krzysztof Kozlowski Signed-off-by: André Draszik Tested-by: Marek Szyprowski Signed-off-by: Ulf Hansson --- drivers/pmdomain/samsung/exynos-pm-domains.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/pmdomain/samsung/exynos-pm-domains.c b/drivers/pmdomain/samsung/exynos-pm-domains.c index 5d478bb37ad6..f53e1bd24798 100644 --- a/drivers/pmdomain/samsung/exynos-pm-domains.c +++ b/drivers/pmdomain/samsung/exynos-pm-domains.c @@ -92,13 +92,14 @@ static const struct of_device_id exynos_pm_domain_of_match[] = { { }, }; -static const char *exynos_get_domain_name(struct device_node *node) +static const char *exynos_get_domain_name(struct device *dev, + struct device_node *node) { const char *name; if (of_property_read_string(node, "label", &name) < 0) name = kbasename(node->full_name); - return kstrdup_const(name, GFP_KERNEL); + return devm_kstrdup_const(dev, name, GFP_KERNEL); } static int exynos_pd_probe(struct platform_device *pdev) @@ -115,15 +116,13 @@ static int exynos_pd_probe(struct platform_device *pdev) if (!pd) return -ENOMEM; - pd->pd.name = exynos_get_domain_name(np); + pd->pd.name = exynos_get_domain_name(dev, np); if (!pd->pd.name) return -ENOMEM; pd->base = of_iomap(np, 0); - if (!pd->base) { - kfree_const(pd->pd.name); + if (!pd->base) return -ENODEV; - } pd->pd.power_off = exynos_pd_power_off; pd->pd.power_on = exynos_pd_power_on; From 6e3a4754717a74e931a9f00b5f953be708e07acb Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Tue, 21 Oct 2025 17:28:25 +0800 Subject: [PATCH 0227/1075] ACPICA: Work around bogus -Wstringop-overread warning since GCC 11 When ACPI_MISALIGNMENT_NOT_SUPPORTED is set, GCC can produce a bogus -Wstringop-overread warning, see [1]. To me, it's very clear that we have a compiler bug here, thus just disable the warning. Fixes: a9d13433fe17 ("LoongArch: Align ACPI structures if ARCH_STRICT_ALIGN enabled") Link: https://lore.kernel.org/all/899f2dec-e8b9-44f4-ab8d-001e160a2aed@roeck-us.net/ Link: https://github.com/acpica/acpica/commit/abf5b573 Link: https://gcc.gnu.org/PR122073 [1] Co-developed-by: Saket Dumbre Signed-off-by: Saket Dumbre Signed-off-by: Xi Ruoyao Acked-by: Huacai Chen Cc: All applicable [ rjw: Subject and changelog edits ] Link: https://patch.msgid.link/20251021092825.822007-1-xry111@xry111.site Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/tbprint.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/acpica/tbprint.c b/drivers/acpi/acpica/tbprint.c index 049f6c2f1e32..e5631027f7f1 100644 --- a/drivers/acpi/acpica/tbprint.c +++ b/drivers/acpi/acpica/tbprint.c @@ -95,6 +95,11 @@ acpi_tb_print_table_header(acpi_physical_address address, { struct acpi_table_header local_header; +#pragma GCC diagnostic push +#if defined(__GNUC__) && __GNUC__ >= 11 +#pragma GCC diagnostic ignored "-Wstringop-overread" +#endif + if (ACPI_COMPARE_NAMESEG(header->signature, ACPI_SIG_FACS)) { /* FACS only has signature and length fields */ @@ -143,4 +148,5 @@ acpi_tb_print_table_header(acpi_physical_address address, local_header.asl_compiler_id, local_header.asl_compiler_revision)); } +#pragma GCC diagnostic pop } From 143937ca51cc6ae2fccc61a1cb916abb24cd34f5 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 15 Oct 2025 10:37:12 +0800 Subject: [PATCH 0228/1075] arm64, mm: avoid always making PTE dirty in pte_mkwrite() Current pte_mkwrite_novma() makes PTE dirty unconditionally. This may mark some pages that are never written dirty wrongly. For example, do_swap_page() may map the exclusive pages with writable and clean PTEs if the VMA is writable and the page fault is for read access. However, current pte_mkwrite_novma() implementation always dirties the PTE. This may cause unnecessary disk writing if the pages are never written before being reclaimed. So, change pte_mkwrite_novma() to clear the PTE_RDONLY bit only if the PTE_DIRTY bit is set to make it possible to make the PTE writable and clean. The current behavior was introduced in commit 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()"). Before that, pte_mkwrite() only sets the PTE_WRITE bit, while set_pte_at() only clears the PTE_RDONLY bit if both the PTE_WRITE and the PTE_DIRTY bits are set. To test the performance impact of the patch, on an arm64 server machine, run 16 redis-server processes on socket 1 and 16 memtier_benchmark processes on socket 0 with mostly get transactions (that is, redis-server will mostly read memory only). The memory footprint of redis-server is larger than the available memory, so swap out/in will be triggered. Test results show that the patch can avoid most swapping out because the pages are mostly clean. And the benchmark throughput improves ~23.9% in the test. Fixes: 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()") Signed-off-by: Huang Ying Cc: Will Deacon Cc: Anshuman Khandual Cc: Ryan Roberts Cc: Gavin Shan Cc: Ard Biesheuvel Cc: Matthew Wilcox (Oracle) Cc: Yicong Yang Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index aa89c2e67ebc..0944e296dd4a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -293,7 +293,8 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) static inline pte_t pte_mkwrite_novma(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + if (pte_sw_dirty(pte)) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); return pte; } From 7c3643f204edf1c5edb12b36b34838683ee5f8dc Mon Sep 17 00:00:00 2001 From: Shuai Xue Date: Sat, 13 Sep 2025 10:32:24 +0800 Subject: [PATCH 0229/1075] acpi,srat: Fix incorrect device handle check for Generic Initiator The Generic Initiator Affinity Structure in SRAT table uses device handle type field to indicate the device type. According to ACPI specification, the device handle type value of 1 represents PCI device, not 0. Fixes: 894c26a1c274 ("ACPI: Support Generic Initiator only domains") Reported-by: Wu Zongyong Signed-off-by: Shuai Xue Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250913023224.39281-1-xueshuai@linux.alibaba.com Signed-off-by: Dave Jiang --- drivers/acpi/numa/srat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 53816dfab645..aa87ee1583a4 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -237,7 +237,7 @@ acpi_table_print_srat_entry(struct acpi_subtable_header *header) struct acpi_srat_generic_affinity *p = (struct acpi_srat_generic_affinity *)header; - if (p->device_handle_type == 0) { + if (p->device_handle_type == 1) { /* * For pci devices this may be the only place they * are assigned a proximity domain From 4f76435fd517981f01608678c06ad9718a86ee98 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 29 Sep 2025 12:53:40 -0400 Subject: [PATCH 0230/1075] NFSD: Define actions for the new time_deleg FATTR4 attributes NFSv4 clients won't send legitimate GETATTR requests for these new attributes because they are intended to be used only with CB_GETATTR and SETATTR. But NFSD has to do something besides crashing if it ever sees a GETATTR request that queries these attributes. RFC 8881 Section 18.7.3 states: > The server MUST return a value for each attribute that the client > requests if the attribute is supported by the server for the > target file system. If the server does not support a particular > attribute on the target file system, then it MUST NOT return the > attribute value and MUST NOT set the attribute bit in the result > bitmap. The server MUST return an error if it supports an > attribute on the target but cannot obtain its value. In that case, > no attribute values will be returned. Further, RFC 9754 Section 5 states: > These new attributes are invalid to be used with GETATTR, VERIFY, > and NVERIFY, and they can only be used with CB_GETATTR and SETATTR > by a client holding an appropriate delegation. Thus there does not appear to be a specific server response mandated by specification. Taking the guidance that querying these attributes via GETATTR is "invalid", NFSD will return nfserr_inval, failing the request entirely. Reported-by: Robert Morris Closes: https://lore.kernel.org/linux-nfs/7819419cf0cb50d8130dc6b747765d2b8febc88a.camel@kernel.org/T/#t Fixes: 51c0d4f7e317 ("nfsd: add support for FATTR4_OPEN_ARGUMENTS") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c0a3c6a7c8bb..8f5ee3014abc 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2939,6 +2939,12 @@ struct nfsd4_fattr_args { typedef __be32(*nfsd4_enc_attr)(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args); +static __be32 nfsd4_encode_fattr4__inval(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfserr_inval; +} + static __be32 nfsd4_encode_fattr4__noop(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { @@ -3560,6 +3566,8 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop, [FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support, + [FATTR4_TIME_DELEG_ACCESS] = nfsd4_encode_fattr4__inval, + [FATTR4_TIME_DELEG_MODIFY] = nfsd4_encode_fattr4__inval, [FATTR4_OPEN_ARGUMENTS] = nfsd4_encode_fattr4_open_arguments, }; From abb1f08a2121dd270193746e43b2a9373db9ad84 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 30 Sep 2025 10:05:20 -0400 Subject: [PATCH 0231/1075] NFSD: Fix crash in nfsd4_read_release() When tracing is enabled, the trace_nfsd_read_done trace point crashes during the pynfs read.testNoFh test. Fixes: 15a8b55dbb1b ("nfsd: call op_release, even when op_func returns an error") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e466cf52d7d7..f9aeefc0da73 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -988,10 +988,11 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, static void nfsd4_read_release(union nfsd4_op_u *u) { - if (u->read.rd_nf) + if (u->read.rd_nf) { + trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, + u->read.rd_offset, u->read.rd_length); nfsd_file_put(u->read.rd_nf); - trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, - u->read.rd_offset, u->read.rd_length); + } } static __be32 From 29cdfb4950702bb849f70f7e3b58b4eeb5c1441c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Sep 2025 11:31:34 -0700 Subject: [PATCH 0232/1075] nfsd: Avoid strlen conflict in nfsd4_encode_components_esc() There is an error building nfs4xdr.c with CONFIG_SUNRPC_DEBUG_TRACE=y and CONFIG_FORTIFY_SOURCE=n due to the local variable strlen conflicting with the function strlen(): In file included from include/linux/cpumask.h:11, from arch/x86/include/asm/paravirt.h:21, from arch/x86/include/asm/irqflags.h:102, from include/linux/irqflags.h:18, from include/linux/spinlock.h:59, from include/linux/mmzone.h:8, from include/linux/gfp.h:7, from include/linux/slab.h:16, from fs/nfsd/nfs4xdr.c:37: fs/nfsd/nfs4xdr.c: In function 'nfsd4_encode_components_esc': include/linux/kernel.h:321:46: error: called object 'strlen' is not a function or function pointer 321 | __trace_puts(_THIS_IP_, str, strlen(str)); \ | ^~~~~~ include/linux/kernel.h:265:17: note: in expansion of macro 'trace_puts' 265 | trace_puts(fmt); \ | ^~~~~~~~~~ include/linux/sunrpc/debug.h:34:41: note: in expansion of macro 'trace_printk' 34 | # define __sunrpc_printk(fmt, ...) trace_printk(fmt, ##__VA_ARGS__) | ^~~~~~~~~~~~ include/linux/sunrpc/debug.h:42:17: note: in expansion of macro '__sunrpc_printk' 42 | __sunrpc_printk(fmt, ##__VA_ARGS__); \ | ^~~~~~~~~~~~~~~ include/linux/sunrpc/debug.h:25:9: note: in expansion of macro 'dfprintk' 25 | dfprintk(FACILITY, fmt, ##__VA_ARGS__) | ^~~~~~~~ fs/nfsd/nfs4xdr.c:2646:9: note: in expansion of macro 'dprintk' 2646 | dprintk("nfsd4_encode_components(%s)\n", components); | ^~~~~~~ fs/nfsd/nfs4xdr.c:2643:13: note: declared here 2643 | int strlen, count=0; | ^~~~~~ This dprintk() instance is not particularly useful, so just remove it altogether to get rid of the immediate strlen() conflict. At the same time, eliminate the local strlen variable to avoid potential conflicts with strlen() in the future. Fixes: ec7d8e68ef0e ("sunrpc: add a Kconfig option to redirect dfprintk() output to trace buffer") Signed-off-by: Nathan Chancellor Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8f5ee3014abc..b689b792c21f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2628,10 +2628,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, __be32 *p; __be32 pathlen; int pathlen_offset; - int strlen, count=0; char *str, *end, *next; - - dprintk("nfsd4_encode_components(%s)\n", components); + int count = 0; pathlen_offset = xdr->buf->len; p = xdr_reserve_space(xdr, 4); @@ -2658,9 +2656,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, for (; *end && (*end != sep); end++) /* find sep or end of string */; - strlen = end - str; - if (strlen) { - if (xdr_stream_encode_opaque(xdr, str, strlen) < 0) + if (end > str) { + if (xdr_stream_encode_opaque(xdr, str, end - str) < 0) return nfserr_resource; count++; } else From 3e7f011c255582d7c914133785bbba1990441713 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 2 Oct 2025 10:00:51 -0400 Subject: [PATCH 0233/1075] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND" I've found that pynfs COMP6 now leaves the connection or lease in a strange state, which causes CLOSE9 to hang indefinitely. I've dug into it a little, but I haven't been able to root-cause it yet. However, I bisected to commit 48aab1606fa8 ("NFSD: Remove the cap on number of operations per NFSv4 COMPOUND"). Tianshuo Han also reports a potential vulnerability when decoding an NFSv4 COMPOUND. An attacker can place an arbitrarily large op count in the COMPOUND header, which results in: [ 51.410584] nfsd: vmalloc error: size 1209533382144, exceeds total pages, mode:0xdc0(GFP_KERNEL|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0 when NFSD attempts to allocate the COMPOUND op array. Let's restore the operation-per-COMPOUND limit, but increased to 200 for now. Reported-by: tianshuo han Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org Tested-by: Tianshuo Han Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 14 ++++++++++++-- fs/nfsd/nfs4state.c | 1 + fs/nfsd/nfs4xdr.c | 4 +++- fs/nfsd/nfsd.h | 3 +++ fs/nfsd/xdr4.h | 1 + 5 files changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f9aeefc0da73..7f7e6bb23a90 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2893,10 +2893,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) rqstp->rq_lease_breaker = (void **)&cstate->clp; - trace_nfsd_compound(rqstp, args->tag, args->taglen, args->opcnt); + trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; + if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) { + /* If there are still more operations to process, + * stop here and report NFS4ERR_RESOURCE. */ + if (cstate->minorversion == 0 && + args->client_opcnt > resp->opcnt) { + op->status = nfserr_resource; + goto encode_op; + } + } + /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -2973,7 +2983,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) status = op->status; } - trace_nfsd_compound_status(args->opcnt, resp->opcnt, + trace_nfsd_compound_status(args->client_opcnt, resp->opcnt, status, nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 81fa7cc6c77b..c1b54322c412 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3902,6 +3902,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs ca->headerpadsz = 0; ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); + ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND); ca->maxresp_cached = min_t(u32, ca->maxresp_cached, NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b689b792c21f..6040a6145dad 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2488,8 +2488,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) return false; - if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) + if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) return false; + argp->opcnt = min_t(u32, argp->client_opcnt, + NFSD_MAX_OPS_PER_COMPOUND); if (argp->opcnt > ARRAY_SIZE(argp->iops)) { argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops)); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index ea87b42894dd..f19320018639 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -57,6 +57,9 @@ struct readdir_cd { __be32 err; /* 0, nfserr, or nfserr_eof */ }; +/* Maximum number of operations per session compound */ +#define NFSD_MAX_OPS_PER_COMPOUND 200 + struct nfsd_genl_rqstp { struct sockaddr rq_daddr; struct sockaddr rq_saddr; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index d4b48602b2b0..ee0570cbdd9e 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -903,6 +903,7 @@ struct nfsd4_compoundargs { char * tag; u32 taglen; u32 minorversion; + u32 client_opcnt; u32 opcnt; bool splice_ok; struct nfsd4_op *ops; From 4e9077638301816a7d73fa1e1b4c1db4a7e3b59c Mon Sep 17 00:00:00 2001 From: Noorain Eqbal Date: Mon, 20 Oct 2025 23:33:01 +0530 Subject: [PATCH 0234/1075] bpf: Sync pending IRQ work before freeing ring buffer Fix a race where irq_work can be queued in bpf_ringbuf_commit() but the ring buffer is freed before the work executes. In the syzbot reproducer, a BPF program attached to sched_switch triggers bpf_ringbuf_commit(), queuing an irq_work. If the ring buffer is freed before this work executes, the irq_work thread may accesses freed memory. Calling `irq_work_sync(&rb->work)` ensures that all pending irq_work complete before freeing the buffer. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Reported-by: syzbot+2617fc732430968b45d2@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2617fc732430968b45d2 Tested-by: syzbot+2617fc732430968b45d2@syzkaller.appspotmail.com Signed-off-by: Noorain Eqbal Link: https://lore.kernel.org/r/20251020180301.103366-1-nooraineqbal@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/ringbuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 719d73299397..d706c4b7f532 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -216,6 +216,8 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr) static void bpf_ringbuf_free(struct bpf_ringbuf *rb) { + irq_work_sync(&rb->work); + /* copy pages pointer and nr_pages to local variable, as we are going * to unmap rb itself with vunmap() below */ From 0e59f47c15cec4cd88c51c5cda749607b719c82b Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 13 Oct 2025 17:58:36 +0100 Subject: [PATCH 0235/1075] mm/mremap: correctly account old mapping after MREMAP_DONTUNMAP remap Commit b714ccb02a76 ("mm/mremap: complete refactor of move_vma()") mistakenly introduced a new behaviour - clearing the VM_ACCOUNT flag of the old mapping when a mapping is mremap()'d with the MREMAP_DONTUNMAP flag set. While we always clear the VM_LOCKED and VM_LOCKONFAULT flags for the old mapping (the page tables have been moved, so there is no data that could possibly be locked in memory), there is no reason to touch any other VMA flags. This is because after the move the old mapping is in a state as if it were freshly mapped. This implies that the attributes of the mapping ought to remain the same, including whether or not the mapping is accounted. Link: https://lkml.kernel.org/r/20251013165836.273113-1-lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Fixes: b714ccb02a76 ("mm/mremap: complete refactor of move_vma()") Reviewed-by: Pedro Falcato Cc: Jann Horn Cc: Liam Howlett Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/mremap.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/mm/mremap.c b/mm/mremap.c index 35de0a7b910e..bd7314898ec5 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -1237,10 +1237,10 @@ static int copy_vma_and_data(struct vma_remap_struct *vrm, } /* - * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() and - * account flags on remaining VMA by convention (it cannot be mlock()'d any - * longer, as pages in range are no longer mapped), and removing anon_vma_chain - * links from it (if the entire VMA was copied over). + * Perform final tasks for MADV_DONTUNMAP operation, clearing mlock() flag on + * remaining VMA by convention (it cannot be mlock()'d any longer, as pages in + * range are no longer mapped), and removing anon_vma_chain links from it if the + * entire VMA was copied over. */ static void dontunmap_complete(struct vma_remap_struct *vrm, struct vm_area_struct *new_vma) @@ -1250,11 +1250,8 @@ static void dontunmap_complete(struct vma_remap_struct *vrm, unsigned long old_start = vrm->vma->vm_start; unsigned long old_end = vrm->vma->vm_end; - /* - * We always clear VM_LOCKED[ONFAULT] | VM_ACCOUNT on the old - * vma. - */ - vm_flags_clear(vrm->vma, VM_LOCKED_MASK | VM_ACCOUNT); + /* We always clear VM_LOCKED[ONFAULT] on the old VMA. */ + vm_flags_clear(vrm->vma, VM_LOCKED_MASK); /* * anon_vma links of the old vma is no longer needed after its page From c3fa5b1bfd8380d935fa961f2ac166bdf000f418 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Tue, 14 Oct 2025 13:59:36 -0700 Subject: [PATCH 0236/1075] mm/damon/core: fix list_add_tail() call on damon_call() Each damon_ctx maintains callback requests using a linked list (damon_ctx->call_controls). When a new callback request is received via damon_call(), the new request should be added to the list. However, the function is making a mistake at list_add_tail() invocation: putting the new item to add and the list head to add it before, in the opposite order. Because of the linked list manipulation implementation, the new request can still be reached from the context's list head. But the list items that were added before the new request are dropped from the list. As a result, the callbacks are unexpectedly not invocated. Worse yet, if the dropped callback requests were dynamically allocated, the memory is leaked. Actually DAMON sysfs interface is using a dynamically allocated repeat-mode callback request for automatic essential stats update. And because the online DAMON parameters commit is using a non-repeat-mode callback request, the issue can easily be reproduced, like below. # damo start --damos_action stat --refresh_stat 1s # damo tune --damos_action stat --refresh_stat 1s The first command dynamically allocates the repeat-mode callback request for automatic essential stat update. Users can see the essential stats are automatically updated for every second, using the sysfs interface. The second command calls damon_commit() with a new callback request that was made for the commit. As a result, the previously added repeat-mode callback request is dropped from the list. The automatic stats refresh stops working, and the memory for the repeat-mode callback request is leaked. It can be confirmed using kmemleak. Fix the mistake on the list_add_tail() call. Link: https://lkml.kernel.org/r/20251014205939.1206-1-sj@kernel.org Fixes: 004ded6bee11 ("mm/damon: accept parallel damon_call() requests") Signed-off-by: SeongJae Park Cc: [6.17+] Signed-off-by: Andrew Morton --- mm/damon/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 93848b4c6944..4670d293bbf4 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -1450,7 +1450,7 @@ int damon_call(struct damon_ctx *ctx, struct damon_call_control *control) INIT_LIST_HEAD(&control->list); mutex_lock(&ctx->call_controls_lock); - list_add_tail(&ctx->call_controls, &control->list); + list_add_tail(&control->list, &ctx->call_controls); mutex_unlock(&ctx->call_controls_lock); if (!damon_is_running(ctx)) return -EINVAL; From 4ba5a8a7faa647ada8eae61a36517cf369f5bbe4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 14 Oct 2025 14:44:55 +0200 Subject: [PATCH 0237/1075] vmw_balloon: indicate success when effectively deflating during migration When migrating a balloon page, we first deflate the old page to then inflate the new page. However, if inflating the new page succeeded, we effectively deflated the old page, reducing the balloon size. In that case, the migration actually worked: similar to migrating+ immediately deflating the new page. The old page will be freed back to the buddy. Right now, the core will leave the page be marked as isolated (as we returned an error). When later trying to putback that page, we will run into the WARN_ON_ONCE() in balloon_page_putback(). That handling was changed in commit 3544c4faccb8 ("mm/balloon_compaction: stop using __ClearPageMovable()"); before that change, we would have tolerated that way of handling it. To fix it, let's just return 0 in that case, making the core effectively just clear the "isolated" flag + freeing it back to the buddy as if the migration succeeded. Note that the new page will also get freed when the core puts the last reference. Note that this also makes it all be more consistent: we will no longer unisolate the page in the balloon driver while keeping it marked as being isolated in migration core. This was found by code inspection. Link: https://lkml.kernel.org/r/20251014124455.478345-1-david@redhat.com Fixes: 3544c4faccb8 ("mm/balloon_compaction: stop using __ClearPageMovable()") Signed-off-by: David Hildenbrand Cc: Jerrin Shaji George Cc: Broadcom internal kernel review list Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Signed-off-by: Andrew Morton --- drivers/misc/vmw_balloon.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c index 6df51ee8db62..cc1d18b3df5c 100644 --- a/drivers/misc/vmw_balloon.c +++ b/drivers/misc/vmw_balloon.c @@ -1737,7 +1737,7 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, { unsigned long status, flags; struct vmballoon *b; - int ret; + int ret = 0; b = container_of(b_dev_info, struct vmballoon, b_dev_info); @@ -1796,17 +1796,15 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, * A failure happened. While we can deflate the page we just * inflated, this deflation can also encounter an error. Instead * we will decrease the size of the balloon to reflect the - * change and report failure. + * change. */ atomic64_dec(&b->size); - ret = -EBUSY; } else { /* * Success. Take a reference for the page, and we will add it to * the list after acquiring the lock. */ get_page(newpage); - ret = 0; } /* Update the balloon list under the @pages_lock */ @@ -1817,7 +1815,7 @@ static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, * If we succeed just insert it to the list and update the statistics * under the lock. */ - if (!ret) { + if (status == VMW_BALLOON_SUCCESS) { balloon_page_insert(&b->b_dev_info, newpage); __count_vm_event(BALLOON_MIGRATE); } From cec944dd329fbefee907da95c298719d900d4787 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Tue, 14 Oct 2025 17:03:44 +0530 Subject: [PATCH 0238/1075] hugetlbfs: move lock assertions after early returns in huge_pmd_unshare() When hugetlb_vmdelete_list() processes VMAs during truncate operations, it may encounter VMAs where huge_pmd_unshare() is called without the required shareable lock. This triggers an assertion failure in hugetlb_vma_assert_locked(). The previous fix in commit dd83609b8898 ("hugetlbfs: skip VMAs without shareable locks in hugetlb_vmdelete_list") skipped entire VMAs without shareable locks to avoid the assertion. However, this prevented pages from being unmapped and freed, causing a regression in fallocate(PUNCH_HOLE) operations where pages were not freed immediately, as reported by Mark Brown. Instead of checking locks in the caller or skipping VMAs, move the lock assertions in huge_pmd_unshare() to after the early return checks. The assertions are only needed when actual PMD unsharing work will be performed. If the function returns early because sz != PMD_SIZE or the PMD is not shared, no locks are required and assertions should not fire. This approach reverts the VMA skipping logic from commit dd83609b8898 ("hugetlbfs: skip VMAs without shareable locks in hugetlb_vmdelete_list") while moving the assertions to avoid the assertion failure, keeping all the logic within huge_pmd_unshare() itself and allowing page unmapping and freeing to proceed for all VMAs. Link: https://lkml.kernel.org/r/20251014113344.21194-1-kartikey406@gmail.com Fixes: dd83609b8898 ("hugetlbfs: skip VMAs without shareable locks in hugetlb_vmdelete_list") Signed-off-by: Deepanshu Kartikey Reported-by: Reported-by: Mark Brown Closes: https://syzkaller.appspot.com/bug?extid=f26d7c75c26ec19790e7 Suggested-by: David Hildenbrand Suggested-by: Oscar Salvador Tested-by: Acked-by: David Hildenbrand Signed-off-by: Andrew Morton --- fs/hugetlbfs/inode.c | 9 --------- mm/hugetlb.c | 5 ++--- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9c94ed8c3ab0..f42548ee9083 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -478,14 +478,6 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end, if (!hugetlb_vma_trylock_write(vma)) continue; - /* - * Skip VMAs without shareable locks. Per the design in commit - * 40549ba8f8e0, these will be handled by remove_inode_hugepages() - * called after this function with proper locking. - */ - if (!__vma_shareable_lock(vma)) - goto skip; - v_start = vma_offset_start(vma, start); v_end = vma_offset_end(vma, end); @@ -496,7 +488,6 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end, * vmas. Therefore, lock is not held when calling * unmap_hugepage_range for private vmas. */ -skip: hugetlb_vma_unlock_write(vma); } } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 795ee393eac0..0455119716ec 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7614,13 +7614,12 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, p4d_t *p4d = p4d_offset(pgd, addr); pud_t *pud = pud_offset(p4d, addr); - i_mmap_assert_write_locked(vma->vm_file->f_mapping); - hugetlb_vma_assert_locked(vma); if (sz != PMD_SIZE) return 0; if (!ptdesc_pmd_is_shared(virt_to_ptdesc(ptep))) return 0; - + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + hugetlb_vma_assert_locked(vma); pud_clear(pud); /* * Once our caller drops the rmap lock, some other process might be From 7071537159be845a5c4ed5fb7d3db25aa4bd04a3 Mon Sep 17 00:00:00 2001 From: Enze Li Date: Tue, 14 Oct 2025 16:42:25 +0800 Subject: [PATCH 0239/1075] mm/damon/core: fix potential memory leak by cleaning ops_filter in damon_destroy_scheme Currently, damon_destroy_scheme() only cleans up the filter list but leaves ops_filter untouched, which could lead to memory leaks when a scheme is destroyed. This patch ensures both filter and ops_filter are properly freed in damon_destroy_scheme(), preventing potential memory leaks. Link: https://lkml.kernel.org/r/20251014084225.313313-1-lienze@kylinos.cn Fixes: ab82e57981d0 ("mm/damon/core: introduce damos->ops_filters") Signed-off-by: Enze Li Reviewed-by: SeongJae Park Tested-by: SeongJae Park Cc: Signed-off-by: Andrew Morton --- mm/damon/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/damon/core.c b/mm/damon/core.c index 4670d293bbf4..083d314fc4e6 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -452,6 +452,9 @@ void damon_destroy_scheme(struct damos *s) damos_for_each_filter_safe(f, next, s) damos_destroy_filter(f); + damos_for_each_ops_filter_safe(f, next, s) + damos_destroy_filter(f); + kfree(s->migrate_dests.node_id_arr); kfree(s->migrate_dests.weight_arr); damon_del_scheme(s); From 7eca961dd7188f20fdf8ce9ed5018280f79b2438 Mon Sep 17 00:00:00 2001 From: SeongJae Park Date: Mon, 13 Oct 2025 17:18:44 -0700 Subject: [PATCH 0240/1075] mm/damon/core: use damos_commit_quota_goal() for new goal commit When damos_commit_quota_goals() is called for adding new DAMOS quota goals of DAMOS_QUOTA_USER_INPUT metric, current_value fields of the new goals should be also set as requested. However, damos_commit_quota_goals() is not updating the field for the case, since it is setting only metrics and target values using damos_new_quota_goal(), and metric-optional union fields using damos_commit_quota_goal_union(). As a result, users could see the first current_value parameter that committed online with a new quota goal is ignored. Users are assumed to commit the current_value for DAMOS_QUOTA_USER_INPUT quota goals, since it is being used as a feedback. Hence the real impact would be subtle. That said, this is obviously not intended behavior. Fix the issue by using damos_commit_quota_goal() which sets all quota goal parameters, instead of damos_commit_quota_goal_union(), which sets only the union fields. Link: https://lkml.kernel.org/r/20251014001846.279282-1-sj@kernel.org Fixes: 1aef9df0ee90 ("mm/damon/core: commit damos_quota_goal->nid") Signed-off-by: SeongJae Park Cc: [6.16+] Signed-off-by: Andrew Morton --- mm/damon/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/damon/core.c b/mm/damon/core.c index 083d314fc4e6..109b050c795a 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -835,7 +835,7 @@ int damos_commit_quota_goals(struct damos_quota *dst, struct damos_quota *src) src_goal->metric, src_goal->target_value); if (!new_goal) return -ENOMEM; - damos_commit_quota_goal_union(new_goal, src_goal); + damos_commit_quota_goal(new_goal, src_goal); damos_add_quota_goal(dst, new_goal); } return 0; From 9aa12167ef1149d9980713b120ddcb31cf17222d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 6 Oct 2025 14:13:37 +0200 Subject: [PATCH 0241/1075] csky: abiv2: adapt to new folio flags field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent changes require the raw folio flags to be accessed via ".f". The merge commit introducing this change adapted most architecture code but forgot the csky abiv2. [rppt@kernel.org: add fix for arch/csky/abiv2/cacheflush.c] Link: https://lkml.kernel.org/r/aPCE238oxAB9QcZa@kernel.org Fixes: 53fbef56e07d ("mm: introduce memdesc_flags_t") Signed-off-by: Thomas Weißschuh Signed-off-by: Mike Rapoport (Microsoft) Acked-by: Guo Ren Acked-by: Zi Yan Cc: Guo Ren Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- arch/csky/abiv2/cacheflush.c | 2 +- arch/csky/abiv2/inc/abi/cacheflush.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c index 876028b1083f..064b0f0f95ca 100644 --- a/arch/csky/abiv2/cacheflush.c +++ b/arch/csky/abiv2/cacheflush.c @@ -21,7 +21,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, folio = page_folio(pfn_to_page(pfn)); - if (test_and_set_bit(PG_dcache_clean, &folio->flags)) + if (test_and_set_bit(PG_dcache_clean, &folio->flags.f)) return; icache_inv_range(address, address + nr*PAGE_SIZE); diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h index 6513ac5d2578..da51a0f02391 100644 --- a/arch/csky/abiv2/inc/abi/cacheflush.h +++ b/arch/csky/abiv2/inc/abi/cacheflush.h @@ -20,8 +20,8 @@ static inline void flush_dcache_folio(struct folio *folio) { - if (test_bit(PG_dcache_clean, &folio->flags)) - clear_bit(PG_dcache_clean, &folio->flags); + if (test_bit(PG_dcache_clean, &folio->flags.f)) + clear_bit(PG_dcache_clean, &folio->flags.f); } #define flush_dcache_folio flush_dcache_folio From e13d315ae077bb7c3c6027cc292401bc0f4ec683 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 17 Oct 2025 15:05:38 +0800 Subject: [PATCH 0242/1075] erofs: avoid infinite loops due to corrupted subpage compact indexes Robert reported an infinite loop observed by two crafted images. The root cause is that `clusterofs` can be larger than `lclustersize` for !NONHEAD `lclusters` in corrupted subpage compact indexes, e.g.: blocksize = lclustersize = 512 lcn = 6 clusterofs = 515 Move the corresponding check for full compress indexes to `z_erofs_load_lcluster_from_disk()` to also cover subpage compact compress indexes. It also fixes the position of `m->type >= Z_EROFS_LCLUSTER_TYPE_MAX` check, since it should be placed right after `z_erofs_load_{compact,full}_lcluster()`. Fixes: 8d2517aaeea3 ("erofs: fix up compacted indexes for block size < 4096") Fixes: 1a5223c182fd ("erofs: do sanity check on m->type in z_erofs_load_compact_lcluster()") Reported-by: Robert Morris Closes: https://lore.kernel.org/r/35167.1760645886@localhost Reviewed-by: Hongbo Li Signed-off-by: Gao Xiang --- fs/erofs/zmap.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 8007814f721e..08ec7b0e50a8 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -55,10 +55,6 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, } else { m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF); m->clusterofs = le16_to_cpu(di->di_clusterofs); - if (m->clusterofs >= 1 << vi->z_lclusterbits) { - DBG_BUGON(1); - return -EFSCORRUPTED; - } m->pblk = le32_to_cpu(di->di_u.blkaddr); } return 0; @@ -240,21 +236,29 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m, unsigned int lcn, bool lookahead) { + struct erofs_inode *vi = EROFS_I(m->inode); + int err; + + if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT) { + err = z_erofs_load_compact_lcluster(m, lcn, lookahead); + } else { + DBG_BUGON(vi->datalayout != EROFS_INODE_COMPRESSED_FULL); + err = z_erofs_load_full_lcluster(m, lcn); + } + if (err) + return err; + if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) { erofs_err(m->inode->i_sb, "unknown type %u @ lcn %u of nid %llu", - m->type, lcn, EROFS_I(m->inode)->nid); + m->type, lcn, EROFS_I(m->inode)->nid); DBG_BUGON(1); return -EOPNOTSUPP; + } else if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD && + m->clusterofs >= (1 << vi->z_lclusterbits)) { + DBG_BUGON(1); + return -EFSCORRUPTED; } - - switch (EROFS_I(m->inode)->datalayout) { - case EROFS_INODE_COMPRESSED_FULL: - return z_erofs_load_full_lcluster(m, lcn); - case EROFS_INODE_COMPRESSED_COMPACT: - return z_erofs_load_compact_lcluster(m, lcn, lookahead); - default: - return -EINVAL; - } + return 0; } static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, From 2a13fc417f493e28bdd368785320dd4c2b3d732e Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 17 Oct 2025 15:05:39 +0800 Subject: [PATCH 0243/1075] erofs: consolidate z_erofs_extent_lookback() The initial m.delta[0] also needs to be checked against zero. In addition, also drop the redundant logic that errors out for lcn == 0 / m.delta[0] == 1 case. Signed-off-by: Gao Xiang --- fs/erofs/zmap.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 08ec7b0e50a8..c8d8e129eb4b 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -272,20 +272,19 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, unsigned long lcn = m->lcn - lookback_distance; int err; + if (!lookback_distance) + break; + err = z_erofs_load_lcluster_from_disk(m, lcn, false); if (err) return err; - if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) { lookback_distance = m->delta[0]; - if (!lookback_distance) - break; continue; - } else { - m->headtype = m->type; - m->map->m_la = (lcn << lclusterbits) | m->clusterofs; - return 0; } + m->headtype = m->type; + m->map->m_la = (lcn << lclusterbits) | m->clusterofs; + return 0; } erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu", lookback_distance, m->lcn, vi->nid); @@ -435,13 +434,6 @@ static int z_erofs_map_blocks_fo(struct inode *inode, end = inode->i_size; } else { if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) { - /* m.lcn should be >= 1 if endoff < m.clusterofs */ - if (!m.lcn) { - erofs_err(sb, "invalid logical cluster 0 at nid %llu", - vi->nid); - err = -EFSCORRUPTED; - goto unmap_out; - } end = (m.lcn << lclusterbits) | m.clusterofs; map->m_flags |= EROFS_MAP_FULL_MAPPED; m.delta[0] = 1; From e84cb860ac3ce67ec6ecc364433fd5b412c448bc Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 20 Oct 2025 22:53:26 +0200 Subject: [PATCH 0244/1075] mptcp: pm: in-kernel: C-flag: handle late ADD_ADDR The special C-flag case expects the ADD_ADDR to be received when switching to 'fully-established'. But for various reasons, the ADD_ADDR could be sent after the "4th ACK", and the special case doesn't work. On NIPA, the new test validating this special case for the C-flag failed a few times, e.g. 102 default limits, server deny join id 0 syn rx [FAIL] got 0 JOIN[s] syn rx expected 2 Server ns stats (...) MPTcpExtAddAddrTx 1 MPTcpExtEchoAdd 1 Client ns stats (...) MPTcpExtAddAddr 1 MPTcpExtEchoAddTx 1 synack rx [FAIL] got 0 JOIN[s] synack rx expected 2 ack rx [FAIL] got 0 JOIN[s] ack rx expected 2 join Rx [FAIL] see above syn tx [FAIL] got 0 JOIN[s] syn tx expected 2 join Tx [FAIL] see above I had a suspicion about what the issue could be: the ADD_ADDR might have been received after the switch to the 'fully-established' state. The issue was not easy to reproduce. The packet capture shown that the ADD_ADDR can indeed be sent with a delay, and the client would not try to establish subflows to it as expected. A simple fix is not to mark the endpoints as 'used' in the C-flag case, when looking at creating subflows to the remote initial IP address and port. In this case, there is no need to try. Note: newly added fullmesh endpoints will still continue to be used as expected, thanks to the conditions behind mptcp_pm_add_addr_c_flag_case. Fixes: 4b1ff850e0c1 ("mptcp: pm: in-kernel: usable client side with C-flag") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-1-8207030cb0e8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_kernel.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index e0f44dc232aa..2ae95476dba3 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -370,6 +370,10 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) } subflow: + /* No need to try establishing subflows to remote id0 if not allowed */ + if (mptcp_pm_add_addr_c_flag_case(msk)) + goto exit; + /* check if should create a new subflow */ while (msk->pm.local_addr_used < endp_subflow_max && msk->pm.extra_subflows < limit_extra_subflows) { @@ -401,6 +405,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) __mptcp_subflow_connect(sk, &local, &addrs[i]); spin_lock_bh(&msk->pm.lock); } + +exit: mptcp_pm_nl_check_work_pending(msk); } From d68460bc31f9c8c6fc81fbb56ec952bec18409f1 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 20 Oct 2025 22:53:27 +0200 Subject: [PATCH 0245/1075] selftests: mptcp: join: mark 'flush re-add' as skipped if not supported The call to 'continue_if' was missing: it properly marks a subtest as 'skipped' if the attached condition is not valid. Without that, the test is wrongly marked as passed on older kernels. Fixes: e06959e9eebd ("selftests: mptcp: join: test for flush/re-add endpoints") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-2-8207030cb0e8@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index c90d8e8b95cb..deba21ca5a97 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -4115,7 +4115,7 @@ endpoint_tests() # flush and re-add if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 1 2 # broadcast IP: no packet for this address will be received on ns1 From 973f80d715bd2504b4db6e049f292e694145cd79 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 20 Oct 2025 22:53:28 +0200 Subject: [PATCH 0246/1075] selftests: mptcp: join: mark implicit tests as skipped if not supported The call to 'continue_if' was missing: it properly marks a subtest as 'skipped' if the attached condition is not valid. Without that, the test is wrongly marked as passed on older kernels. Fixes: 36c4127ae8dd ("selftests: mptcp: join: skip implicit tests if not supported") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-3-8207030cb0e8@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index deba21ca5a97..d98f8f8905b9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3939,7 +3939,7 @@ endpoint_tests() # subflow_rebuild_header is needed to support the implicit flag # userspace pm type prevents add_addr if reset "implicit EP" && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -3964,7 +3964,7 @@ endpoint_tests() fi if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then start_events pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 0 3 From c3496c052ac36ea98ec4f8e95ae6285a425a2457 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 20 Oct 2025 22:53:29 +0200 Subject: [PATCH 0247/1075] selftests: mptcp: join: mark 'delete re-add signal' as skipped if not supported The call to 'continue_if' was missing: it properly marks a subtest as 'skipped' if the attached condition is not valid. Without that, the test is wrongly marked as passed on older kernels. Fixes: b5e2fb832f48 ("selftests: mptcp: add explicit test case for remove/readd") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-4-8207030cb0e8@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index d98f8f8905b9..b2a8c51a3969 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -4040,7 +4040,7 @@ endpoint_tests() # remove and re-add if reset_with_events "delete re-add signal" && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 3 3 From a9649dfbe552a42a3781fb681d93a2f510565954 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 20 Oct 2025 22:53:30 +0200 Subject: [PATCH 0248/1075] selftests: mptcp: join: mark laminar tests as skipped if not supported The call to 'continue_if' was missing: it properly marks a subtest as 'skipped' if the attached condition is not valid. Without that, the test is wrongly marked as passed on older kernels. Fixes: c912f935a5c7 ("selftests: mptcp: join: validate new laminar endp") Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251020-net-mptcp-c-flag-late-add-addr-v1-5-8207030cb0e8@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b2a8c51a3969..78a1aa4ecff2 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2324,7 +2324,7 @@ laminar_endp_tests() { # no laminar endpoints: routing rules are used if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2336,7 +2336,7 @@ laminar_endp_tests() # laminar endpoints: this endpoint is used if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2348,7 +2348,7 @@ laminar_endp_tests() # laminar endpoints: these endpoints are used if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2363,7 +2363,7 @@ laminar_endp_tests() # laminar endpoints: only one endpoint is used if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2376,7 +2376,7 @@ laminar_endp_tests() # laminar endpoints: subflow and laminar flags if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT && - mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then pm_nl_set_limits $ns1 0 4 pm_nl_set_limits $ns2 2 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal From c5394b8b7a92c5013d2917591e28e938fe7ff2a2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Oct 2025 16:11:14 +0000 Subject: [PATCH 0249/1075] net: gro_cells: fix lock imbalance in gro_cells_receive() syzbot found that the local_unlock_nested_bh() call was missing in some cases. WARNING: possible recursive locking detected syzkaller #0 Not tainted -------------------------------------------- syz.2.329/7421 is trying to acquire lock: ffffe8ffffd48888 ((&cell->bh_lock)){+...}-{3:3}, at: spin_lock include/linux/spinlock_rt.h:44 [inline] ffffe8ffffd48888 ((&cell->bh_lock)){+...}-{3:3}, at: gro_cells_receive+0x404/0x790 net/core/gro_cells.c:30 but task is already holding lock: ffffe8ffffd48888 ((&cell->bh_lock)){+...}-{3:3}, at: spin_lock include/linux/spinlock_rt.h:44 [inline] ffffe8ffffd48888 ((&cell->bh_lock)){+...}-{3:3}, at: gro_cells_receive+0x404/0x790 net/core/gro_cells.c:30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock((&cell->bh_lock)); lock((&cell->bh_lock)); *** DEADLOCK *** Given the introduction of @have_bh_lock variable, it seems the author intent was to have the local_unlock_nested_bh() after the @unlock label. Fixes: 25718fdcbdd2 ("net: gro_cells: Use nested-BH locking for gro_cell") Reported-by: syzbot+f9651b9a8212e1c8906f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68f65eb9.a70a0220.205af.0034.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Sebastian Andrzej Siewior Reviewed-by: David Ahern Link: https://patch.msgid.link/20251020161114.1891141-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/core/gro_cells.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index b43911562f4d..fd57b845de33 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -43,12 +43,11 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) if (skb_queue_len(&cell->napi_skbs) == 1) napi_schedule(&cell->napi); - if (have_bh_lock) - local_unlock_nested_bh(&gcells->cells->bh_lock); - res = NET_RX_SUCCESS; unlock: + if (have_bh_lock) + local_unlock_nested_bh(&gcells->cells->bh_lock); rcu_read_unlock(); return res; } From 86c48f50bababbb45622616b48385aa94bfadf5f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 19 Oct 2025 22:27:16 -0700 Subject: [PATCH 0250/1075] Documentation: networking: ax25: update the mailing list info. Update the mailing list subscription information for the linux-hams mailing list. Signed-off-by: Randy Dunlap Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251020052716.3136773-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- Documentation/networking/ax25.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst index 605e72c6c877..89c79dd6c6f9 100644 --- a/Documentation/networking/ax25.rst +++ b/Documentation/networking/ax25.rst @@ -11,6 +11,7 @@ found on https://linux-ax25.in-berlin.de. There is a mailing list for discussing Linux amateur radio matters called linux-hams@vger.kernel.org. To subscribe to it, send a message to -majordomo@vger.kernel.org with the words "subscribe linux-hams" in the body -of the message, the subject field is ignored. You don't need to be -subscribed to post but of course that means you might miss an answer. +linux-hams+subscribe@vger.kernel.org or use the web interface at +https://vger.kernel.org. The subject and body of the message are +ignored. You don't need to be subscribed to post but of course that +means you might miss an answer. From e23ef4f22db30a1e49c8b060e4ebc9dc9ca99c49 Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Wed, 8 Oct 2025 14:55:43 +0800 Subject: [PATCH 0251/1075] scsi: ufs: core: Fix error handler host_sem issue Fix the issue where host_sem is not released due to a new return path in commit f966e02ae521 ("scsi: ufs: core: Fix runtime suspend error deadlock"). Check pm_op_in_progress before acquiring hba->host_sem to prevent deadlocks and ensure proper resource management during error handling. Add comment for use ufshcd_rpm_get_noresume() to safely perform link recovery without interfering with ongoing PM operations. Fixes: f966e02ae521 ("scsi: ufs: core: Fix runtime suspend error deadlock") Reported-by: Dan Carpenter Signed-off-by: Peter Wang Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20251008065651.1589614-2-peter.wang@mediatek.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 8339fec975b9..8f4e884892a9 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6673,6 +6673,20 @@ static void ufshcd_err_handler(struct work_struct *work) hba->saved_uic_err, hba->force_reset, ufshcd_is_link_broken(hba) ? "; link is broken" : ""); + /* + * Use ufshcd_rpm_get_noresume() here to safely perform link recovery + * even if an error occurs during runtime suspend or runtime resume. + * This avoids potential deadlocks that could happen if we tried to + * resume the device while a PM operation is already in progress. + */ + ufshcd_rpm_get_noresume(hba); + if (hba->pm_op_in_progress) { + ufshcd_link_recovery(hba); + ufshcd_rpm_put(hba); + return; + } + ufshcd_rpm_put(hba); + down(&hba->host_sem); spin_lock_irqsave(hba->host->host_lock, flags); if (ufshcd_err_handling_should_stop(hba)) { @@ -6684,14 +6698,6 @@ static void ufshcd_err_handler(struct work_struct *work) } spin_unlock_irqrestore(hba->host->host_lock, flags); - ufshcd_rpm_get_noresume(hba); - if (hba->pm_op_in_progress) { - ufshcd_link_recovery(hba); - ufshcd_rpm_put(hba); - return; - } - ufshcd_rpm_put(hba); - ufshcd_err_handling_prepare(hba); spin_lock_irqsave(hba->host->host_lock, flags); From 6fe4c679dde3075cb481beb3945269bb2ef8b19a Mon Sep 17 00:00:00 2001 From: Wonkon Kim Date: Mon, 20 Oct 2025 15:15:38 +0900 Subject: [PATCH 0252/1075] scsi: ufs: core: Initialize value of an attribute returned by uic cmd If ufshcd_send_cmd() fails, *mib_val may have a garbage value. It can get an unintended value of an attribute. Make ufshcd_dme_get_attr() always initialize *mib_val. Fixes: 12b4fdb4f6bc ("[SCSI] ufs: add dme configuration primitives") Signed-off-by: Wonkon Kim Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20251020061539.28661-2-wkon.kim@samsung.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 8f4e884892a9..498c0c8c0a6b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4282,8 +4282,8 @@ int ufshcd_dme_get_attr(struct ufs_hba *hba, u32 attr_sel, get, UIC_GET_ATTR_ID(attr_sel), UFS_UIC_COMMAND_RETRIES - retries); - if (mib_val && !ret) - *mib_val = uic_cmd.argument3; + if (mib_val) + *mib_val = ret == 0 ? uic_cmd.argument3 : 0; if (peer && (hba->quirks & UFSHCD_QUIRK_DME_PEER_ACCESS_AUTO_MODE) && pwr_mode_change) From 35bc3c8ee319083333b758355ffd739a96af5c00 Mon Sep 17 00:00:00 2001 From: Wonkon Kim Date: Mon, 20 Oct 2025 15:15:39 +0900 Subject: [PATCH 0253/1075] scsi: ufs: core: Declare tx_lanes witout initialization A value of an attribute will be initialized at ufshcd_dme_get_attr(). There is no need to initialize a tx_lanes. Signed-off-by: Wonkon Kim Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20251020061539.28661-3-wkon.kim@samsung.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 498c0c8c0a6b..9ca27de4767a 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -4999,7 +4999,7 @@ EXPORT_SYMBOL_GPL(ufshcd_hba_enable); static int ufshcd_disable_tx_lcc(struct ufs_hba *hba, bool peer) { - int tx_lanes = 0, i, err = 0; + int tx_lanes, i, err = 0; if (!peer) ufshcd_dme_get(hba, UIC_ARG_MIB(PA_CONNECTEDTXDATALANES), From d54c676d4fe0543d1642ab7a68ffdd31e8639a5d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Oct 2025 15:02:43 -0700 Subject: [PATCH 0254/1075] scsi: core: Fix the unit attention counter implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scsi_decide_disposition() may call scsi_check_sense(). scsi_decide_disposition() calls are not serialized. Hence, counter updates by scsi_check_sense() must be serialized. Hence this patch that makes the counters updated by scsi_check_sense() atomic. Cc: Kai Mäkisara Fixes: a5d518cd4e3e ("scsi: core: Add counters for New Media and Power On/Reset UNIT ATTENTIONs") Signed-off-by: Bart Van Assche Reviewed-by: Ewan D. Milne Link: https://patch.msgid.link/20251014220244.3689508-1-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_error.c | 4 ++-- include/scsi/scsi_device.h | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 746ff6a1f309..1c13812a3f03 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -554,9 +554,9 @@ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) * happened, even if someone else gets the sense data. */ if (sshdr.asc == 0x28) - scmd->device->ua_new_media_ctr++; + atomic_inc(&sdev->ua_new_media_ctr); else if (sshdr.asc == 0x29) - scmd->device->ua_por_ctr++; + atomic_inc(&sdev->ua_por_ctr); } if (scsi_sense_is_deferred(&sshdr)) diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 6d6500148c4b..993008cdea65 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -252,8 +252,8 @@ struct scsi_device { unsigned int queue_stopped; /* request queue is quiesced */ bool offline_already; /* Device offline message logged */ - unsigned int ua_new_media_ctr; /* Counter for New Media UNIT ATTENTIONs */ - unsigned int ua_por_ctr; /* Counter for Power On / Reset UAs */ + atomic_t ua_new_media_ctr; /* Counter for New Media UNIT ATTENTIONs */ + atomic_t ua_por_ctr; /* Counter for Power On / Reset UAs */ atomic_t disk_events_disable_depth; /* disable depth for disk events */ @@ -693,10 +693,8 @@ static inline int scsi_device_busy(struct scsi_device *sdev) } /* Macros to access the UNIT ATTENTION counters */ -#define scsi_get_ua_new_media_ctr(sdev) \ - ((const unsigned int)(sdev->ua_new_media_ctr)) -#define scsi_get_ua_por_ctr(sdev) \ - ((const unsigned int)(sdev->ua_por_ctr)) +#define scsi_get_ua_new_media_ctr(sdev) atomic_read(&sdev->ua_new_media_ctr) +#define scsi_get_ua_por_ctr(sdev) atomic_read(&sdev->ua_por_ctr) #define MODULE_ALIAS_SCSI_DEVICE(type) \ MODULE_ALIAS("scsi:t-" __stringify(type) "*") From 881a9c9cb7856b24e390fad9f59acfd73b98b3b2 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Tue, 21 Oct 2025 14:27:58 +0200 Subject: [PATCH 0255/1075] bpf: Do not audit capability check in do_jit() The failure of this check only results in a security mitigation being applied, slightly affecting performance of the compiled BPF program. It doesn't result in a failed syscall, an thus auditing a failed LSM permission check for it is unwanted. For example with SELinux, it causes a denial to be reported for confined processes running as root, which tends to be flagged as a problem to be fixed in the policy. Yet dontauditing or allowing CAP_SYS_ADMIN to the domain may not be desirable, as it would allow/silence also other checks - either going against the principle of least privilege or making debugging potentially harder. Fix it by changing it from capable() to ns_capable_noaudit(), which instructs the LSMs to not audit the resulting denials. Link: https://bugzilla.redhat.com/show_bug.cgi?id=2369326 Fixes: d4e89d212d40 ("x86/bpf: Call branch history clearing sequence on exit") Signed-off-by: Ondrej Mosnacek Reviewed-by: Paul Moore Link: https://lore.kernel.org/r/20251021122758.2659513-1-omosnace@redhat.com Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d4c93d9e73e4..de5083cb1d37 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2701,7 +2701,7 @@ st: if (is_imm8(insn->off)) /* Update cleanup_addr */ ctx->cleanup_addr = proglen; if (bpf_prog_was_classic(bpf_prog) && - !capable(CAP_SYS_ADMIN)) { + !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) { u8 *ip = image + addrs[i - 1]; if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) From 5523508258d390fd55b25c1c0f99c092f23a39e9 Mon Sep 17 00:00:00 2001 From: Yeounsu Moon Date: Sun, 19 Oct 2025 16:55:40 +0900 Subject: [PATCH 0256/1075] net: dlink: use dev_kfree_skb_any instead of dev_kfree_skb Replace `dev_kfree_skb()` with `dev_kfree_skb_any()` in `start_xmit()` which can be called from netpoll (hard IRQ) and from other contexts. Also, `np->link_status` can be changed at any time by interrupt handler. -0 [011] dNh4. 4541.754603: start_xmit <-netpoll_start_xmit -0 [011] dNh4. 4541.754622: => [FTRACE TRAMPOLINE] => start_xmit => netpoll_start_xmit => netpoll_send_skb => write_msg => console_flush_all => console_unlock => vprintk_emit => _printk => rio_interrupt => __handle_irq_event_percpu => handle_irq_event => handle_fasteoi_irq => __common_interrupt => common_interrupt => asm_common_interrupt => mwait_idle => default_idle_call => do_idle => cpu_startup_entry => start_secondary => common_startup_64 This issue can occur when the link state changes from off to on (e.g., plugging or unplugging the LAN cable) while transmitting a packet. If the skb has a destructor, a warning message may be printed in this situation. -> consume_skb (dev_kfree_skb()) -> __kfree_skb() -> skb_release_all() -> skb_release_head_state(skb) if (skb->destructor) { DEBUG_NET_WARN_ON_ONCE(in_hardirq()); skb->destructor(skb); } Found by inspection. Signed-off-by: Yeounsu Moon Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-on: D-Link DGE-550T Rev-A3 Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251019075540.55697-1-yyyynoom@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/dlink/dl2k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index 7077d705e471..6e4f17142519 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -733,7 +733,7 @@ start_xmit (struct sk_buff *skb, struct net_device *dev) u64 tfc_vlan_tag = 0; if (np->link_status == 0) { /* Link Down */ - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } entry = np->cur_tx % TX_RING_SIZE; From d63f0391d6c7b75e1a847e1a26349fa8cad0004d Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 20 Oct 2025 08:54:54 +0200 Subject: [PATCH 0257/1075] net: hibmcge: select FIXED_PHY hibmcge uses fixed_phy_register() et al, but doesn't cater for the case that hibmcge is built-in and fixed_phy is a module. To solve this select FIXED_PHY. Fixes: 1d7cd7a9c69c ("net: hibmcge: support scenario without PHY") Signed-off-by: Heiner Kallweit Reviewed-by: Jijie Shao Link: https://patch.msgid.link/c4fc061f-b6d5-418b-a0dc-6b238cdbedce@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 65302c41bfb1..38875c196cb6 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -148,6 +148,7 @@ config HIBMCGE tristate "Hisilicon BMC Gigabit Ethernet Device Support" depends on PCI && PCI_MSI select PHYLIB + select FIXED_PHY select MOTORCOMM_PHY select REALTEK_PHY help From 43c36a56ccf6d9b07b4b3f4f614756e687dcdc01 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 22 Oct 2025 06:33:42 +0900 Subject: [PATCH 0258/1075] Revert "fs/9p: Refresh metadata in d_revalidate for uncached mode too" This reverts commit 290434474c332a2ba9c8499fe699c7f2e1153280. That commit broke cache=mmap, a mode that doesn't cache metadata, but still has writeback cache. In commit 290434474c33 ("fs/9p: Refresh metadata in d_revalidate for uncached mode too") we considered metadata cache to be enough to not look at the server, but in writeback cache too looking at the server size would make the vfs consider the file has been truncated before the data has been flushed out, making the following repro fail (nothing is ever read back, the resulting file ends up with no data written) ``` #include #include #include #include char buf[4096]; int main(int argc, char *argv[]) { int ret, i; int fdw, fdr; if (argc < 2) return 1; fdw = openat(AT_FDCWD, argv[1], O_RDWR|O_CREAT|O_EXCL|O_CLOEXEC, 0600); if (fdw < 0) { fprintf(stderr, "cannot open fdw\n"); return 1; } write(fdw, buf, sizeof(buf)); fdr = openat(AT_FDCWD, argv[1], O_RDONLY|O_CLOEXEC); if (fdr < 0) { fprintf(stderr, "cannot open fdr\n"); close(fdw); return 1; } for (i = 0; i < 10; i++) { ret = read(fdr, buf, sizeof(buf)); fprintf(stderr, "i: %d, read returns %d\n", i, ret); } close(fdr); close(fdw); return 0; } ``` There is a fix for this particular reproducer but it looks like there are other problems around metadata refresh (e.g. around file rename), so revert this to avoid d_revalidate in uncached mode for now. Reported-by: Song Liu Link: https://lkml.kernel.org/r/CAHzjS_u_SYdt5=2gYO_dxzMKXzGMt-TfdE_ueowg-Hq5tRCAiw@mail.gmail.com Reported-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/CAEf4BzZbCE4tLoDZyUf_aASpgAGFj75QMfSXX4a4dLYixnOiLg@mail.gmail.com/ Fixes: 290434474c33 ("fs/9p: Refresh metadata in d_revalidate for uncached mode too") Signed-off-by: Dominique Martinet --- fs/9p/vfs_dentry.c | 10 ++-------- fs/9p/vfs_inode.c | 8 +------- fs/9p/vfs_inode_dotl.c | 8 +------- 3 files changed, 4 insertions(+), 22 deletions(-) diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f3248a3e5402..c1acbc98465d 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -66,7 +66,6 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) struct p9_fid *fid; struct inode *inode; struct v9fs_inode *v9inode; - unsigned int cached; if (flags & LOOKUP_RCU) return -ECHILD; @@ -76,11 +75,7 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) goto out_valid; v9inode = V9FS_I(inode); - struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); - - cached = v9ses->cache & (CACHE_META | CACHE_LOOSE); - - if (!cached || v9inode->cache_validity & V9FS_INO_INVALID_ATTR) { + if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) { int retval; struct v9fs_session_info *v9ses; @@ -114,6 +109,7 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) p9_debug(P9_DEBUG_VFS, "refresh inode: dentry = %pd (%p), got error %pe\n", dentry, dentry, ERR_PTR(retval)); + if (retval < 0) return retval; } } @@ -150,8 +146,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = { }; const struct dentry_operations v9fs_dentry_operations = { - .d_revalidate = v9fs_lookup_revalidate, - .d_weak_revalidate = __v9fs_lookup_revalidate, .d_release = v9fs_dentry_release, .d_unalias_trylock = v9fs_dentry_unalias_trylock, .d_unalias_unlock = v9fs_dentry_unalias_unlock, diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 69f378a83775..d0c77ec31b1d 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1339,14 +1339,8 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) * Don't update inode if the file type is different */ umode = p9mode2unixmode(v9ses, st, &rdev); - if (inode_wrong_type(inode, umode)) { - /* - * Do this as a way of letting the caller know the inode should not - * be reused - */ - v9fs_invalidate_inode_attr(inode); + if (inode_wrong_type(inode, umode)) goto out; - } /* * We don't want to refresh inode->i_size, diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 0b404e8484d2..be297e335468 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -897,14 +897,8 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) /* * Don't update inode if the file type is different */ - if (inode_wrong_type(inode, st->st_mode)) { - /* - * Do this as a way of letting the caller know the inode should not - * be reused - */ - v9fs_invalidate_inode_attr(inode); + if (inode_wrong_type(inode, st->st_mode)) goto out; - } /* * We don't want to refresh inode->i_size, From 7959ffbec062c35bda02aa635d21ac45dbfacd80 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Fri, 19 Sep 2025 17:28:53 +0300 Subject: [PATCH 0259/1075] nvmem: rcar-efuse: add missing MODULE_DEVICE_TABLE The nvmem-rcar-efuse driver can be compiled as a module. Add missing MODULE_DEVICE_TABLE so it can be matched by modalias and automatically loaded by udev. Cc: stable@vger.kernel.org Fixes: 1530b923a514 ("nvmem: Add R-Car E-FUSE driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20250919142856.2313927-1-cosmin-gabriel.tanislav.xa@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/rcar-efuse.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvmem/rcar-efuse.c b/drivers/nvmem/rcar-efuse.c index f24bdb9cb5a7..d9a96a1d59c8 100644 --- a/drivers/nvmem/rcar-efuse.c +++ b/drivers/nvmem/rcar-efuse.c @@ -127,6 +127,7 @@ static const struct of_device_id rcar_fuse_match[] = { { .compatible = "renesas,r8a779h0-otp", .data = &rcar_fuse_v4m }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, rcar_fuse_match); static struct platform_driver rcar_fuse_driver = { .probe = rcar_fuse_probe, From 70ad06df73a9796026b197d84ead751e096618c7 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 16 Oct 2025 15:50:40 +0200 Subject: [PATCH 0260/1075] misc: amd-sbi: Clarify that this is a BMC driver Add a sentence to the driver description to clarify that the sbrmi-i2c driver is intended to run on the BMC and not on the managed node. Add platform dependencies accordingly. Signed-off-by: Jean Delvare Link: https://lore.kernel.org/r/5c9f7100-0e59-4237-a252-43c3ee4802a2@amd.com Link: https://patch.msgid.link/20251016155040.0e86c102@endymion Signed-off-by: Greg Kroah-Hartman --- drivers/misc/amd-sbi/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/amd-sbi/Kconfig b/drivers/misc/amd-sbi/Kconfig index 4aae0733d0fc..ab594908cb4a 100644 --- a/drivers/misc/amd-sbi/Kconfig +++ b/drivers/misc/amd-sbi/Kconfig @@ -2,9 +2,11 @@ config AMD_SBRMI_I2C tristate "AMD side band RMI support" depends on I2C + depends on ARM || ARM64 || COMPILE_TEST select REGMAP_I2C help Side band RMI over I2C support for AMD out of band management. + This driver is intended to run on the BMC, not the managed node. This driver can also be built as a module. If so, the module will be called sbrmi-i2c. From 410d6c2ad4d1a88efa0acbb9966693725b564933 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 16 Oct 2025 15:59:12 +0300 Subject: [PATCH 0261/1075] mei: me: add wildcat lake P DID Add Wildcat Lake P device id. Cc: stable@vger.kernel.org Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Link: https://patch.msgid.link/20251016125912.2146136-1-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index bc40b940ae21..a4f75dc36929 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -120,6 +120,8 @@ #define MEI_DEV_ID_PTL_H 0xE370 /* Panther Lake H */ #define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ +#define MEI_DEV_ID_WCL_P 0x4D70 /* Wildcat Lake P */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index b108a7c22388..b017ff29dbd1 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -127,6 +127,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_H, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_WCL_P, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; From fff111bf45cbeeb659324316d68554e35d350092 Mon Sep 17 00:00:00 2001 From: Junhao Xie Date: Fri, 17 Oct 2025 16:39:06 +0800 Subject: [PATCH 0262/1075] misc: fastrpc: Fix dma_buf object leak in fastrpc_map_lookup In fastrpc_map_lookup, dma_buf_get is called to obtain a reference to the dma_buf for comparison purposes. However, this reference is never released when the function returns, leading to a dma_buf memory leak. Fix this by adding dma_buf_put before returning from the function, ensuring that the temporarily acquired reference is properly released regardless of whether a matching map is found. Fixes: 9031626ade38 ("misc: fastrpc: Fix fastrpc_map_lookup operation") Cc: stable@kernel.org Signed-off-by: Junhao Xie Tested-by: Xilin Wu Rule: add Link: https://lore.kernel.org/stable/48B368FB4C7007A7%2B20251017083906.3259343-1-bigfoot%40radxa.com Link: https://patch.msgid.link/48B368FB4C7007A7+20251017083906.3259343-1-bigfoot@radxa.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 621bce7e101c..ee652ef01534 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -381,6 +381,8 @@ static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd, } spin_unlock(&fl->lock); + dma_buf_put(buf); + return ret; } From 98718e80af0bb1cd80f4bfe565dd60c57debad51 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 20 Sep 2025 16:17:50 -0700 Subject: [PATCH 0263/1075] mei: late_bind: Fix -Wincompatible-function-pointer-types-strict When building with -Wincompatible-function-pointer-types-strict, a warning designed to catch kernel control flow integrity (kCFI) issues at build time, there is an instance in the new mei late binding code originating from the type parameter of mei_lb_push_payload(): drivers/misc/mei/mei_lb.c:211:18: error: incompatible function pointer types initializing 'int (*)(struct device *, u32, u32, const void *, size_t)' (aka 'int (*)(struct device *, unsigned int, unsigned int, const void *, unsigned long)') with an expression of type 'int (struct device *, enum intel_lb_type, u32, const void *, size_t)' (aka 'int (struct device *, enum intel_lb_type, unsigned int, const void *, unsigned long)') [-Werror,-Wincompatible-function-pointer-types-strict] 211 | .push_payload = mei_lb_push_payload, | ^~~~~~~~~~~~~~~~~~~ While 'unsigned int' and 'enum intel_lb_type' are ABI compatible, hence no regular warning from -Wincompatible-function-pointer-types, the mismatch will trigger a kCFI violation when mei_lb_push_payload() is called indirectly. Update the type parameter of mei_lb_push_payload() to be 'u32' to match the prototype in 'struct intel_lb_component_ops', clearing up the warning and kCFI violation. Fixes: 741eeabb7c78 ("mei: late_bind: add late binding component driver") Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20250920-drm-xe-fix-wifpts-v1-1-c89b5357c7ba@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/mei_lb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/misc/mei/mei_lb.c b/drivers/misc/mei/mei_lb.c index 77686b108d3c..78717ee8ac9a 100644 --- a/drivers/misc/mei/mei_lb.c +++ b/drivers/misc/mei/mei_lb.c @@ -134,8 +134,7 @@ static bool mei_lb_check_response(const struct device *dev, ssize_t bytes, return true; } -static int mei_lb_push_payload(struct device *dev, - enum intel_lb_type type, u32 flags, +static int mei_lb_push_payload(struct device *dev, u32 type, u32 flags, const void *payload, size_t payload_size) { struct mei_cl_device *cldev; From 87b318ba81dda2ee7b603f4f6c55e78ec3e95974 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Wed, 24 Sep 2025 15:56:39 +0530 Subject: [PATCH 0264/1075] comedi: fix divide-by-zero in comedi_buf_munge() The comedi_buf_munge() function performs a modulo operation `async->munge_chan %= async->cmd.chanlist_len` without first checking if chanlist_len is zero. If a user program submits a command with chanlist_len set to zero, this causes a divide-by-zero error when the device processes data in the interrupt handler path. Add a check for zero chanlist_len at the beginning of the function, similar to the existing checks for !map and CMDF_RAWDATA flag. When chanlist_len is zero, update munge_count and return early, indicating the data was handled without munging. This prevents potential kernel panics from malformed user commands. Reported-by: syzbot+f6c3c066162d2c43a66c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f6c3c066162d2c43a66c Cc: stable@vger.kernel.org Signed-off-by: Deepanshu Kartikey Reviewed-by: Ian Abbott Link: https://patch.msgid.link/20250924102639.1256191-1-kartikey406@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/comedi_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/comedi/comedi_buf.c b/drivers/comedi/comedi_buf.c index 002c0e76baff..c7c262a2d8ca 100644 --- a/drivers/comedi/comedi_buf.c +++ b/drivers/comedi/comedi_buf.c @@ -317,7 +317,7 @@ static unsigned int comedi_buf_munge(struct comedi_subdevice *s, unsigned int count = 0; const unsigned int num_sample_bytes = comedi_bytes_per_sample(s); - if (!s->munge || (async->cmd.flags & CMDF_RAWDATA)) { + if (!s->munge || (async->cmd.flags & CMDF_RAWDATA) || async->cmd.chanlist_len == 0) { async->munge_count += num_bytes; return num_bytes; } From 2463ae285e5c162686fb19e822fb6b535e6e728a Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 19 Oct 2025 10:36:59 +0300 Subject: [PATCH 0265/1075] mei: txe: fix initialization order The mei_register() should move before the mei_start() for hook on class device to work. Same change was implemented in mei-me, missed from mei-txe. Fixes: 7704e6be4ed2 ("mei: hook mei_device on class device") Signed-off-by: Alexander Usyskin Link: https://patch.msgid.link/20251019073659.2646791-1-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-txe.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c index c9eb5c5393e4..06b55a891c6b 100644 --- a/drivers/misc/mei/pci-txe.c +++ b/drivers/misc/mei/pci-txe.c @@ -109,19 +109,19 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto end; } + err = mei_register(dev, &pdev->dev); + if (err) + goto release_irq; + if (mei_start(dev)) { dev_err(&pdev->dev, "init hw failure.\n"); err = -ENODEV; - goto release_irq; + goto deregister; } pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_TXI_RPM_TIMEOUT); pm_runtime_use_autosuspend(&pdev->dev); - err = mei_register(dev, &pdev->dev); - if (err) - goto stop; - pci_set_drvdata(pdev, dev); /* @@ -144,8 +144,8 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; -stop: - mei_stop(dev); +deregister: + mei_deregister(dev); release_irq: mei_cancel_work(dev); mei_disable_interrupts(dev); From d90eeb8ecd227c204ab6c34a17b372bd950b7aa2 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 15 Oct 2025 14:26:55 +0000 Subject: [PATCH 0266/1075] binder: remove "invalid inc weak" check There are no scenarios where a weak increment is invalid on binder_node. The only possible case where it could be invalid is if the kernel delivers BR_DECREFS to the process that owns the node, and then increments the weak refcount again, effectively "reviving" a dead node. However, that is not possible: when the BR_DECREFS command is delivered, the kernel removes and frees the binder_node. The fact that you were able to call binder_inc_node_nilocked() implies that the node is not yet destroyed, which implies that BR_DECREFS has not been delivered to userspace, so incrementing the weak refcount is valid. Note that it's currently possible to trigger this condition if the owner calls BINDER_THREAD_EXIT while node->has_weak_ref is true. This causes BC_INCREFS on binder_ref instances to fail when they should not. Cc: stable@vger.kernel.org Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Reported-by: Yu-Ting Tseng Signed-off-by: Alice Ryhl Link: https://patch.msgid.link/20251015-binder-weak-inc-v1-1-7914b092c371@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3a09c54bc37b..a3a1b5c33ba3 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -851,17 +851,8 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, } else { if (!internal) node->local_weak_refs++; - if (!node->has_weak_ref && list_empty(&node->work.entry)) { - if (target_list == NULL) { - pr_err("invalid inc weak node for %d\n", - node->debug_id); - return -EINVAL; - } - /* - * See comment above - */ + if (!node->has_weak_ref && target_list && list_empty(&node->work.entry)) binder_enqueue_work_ilocked(&node->work, target_list); - } } return 0; } From 4b1270902609ef0d935ed2faa2ea6d122bd148f5 Mon Sep 17 00:00:00 2001 From: Victoria Votokina Date: Fri, 10 Oct 2025 13:52:40 +0300 Subject: [PATCH 0267/1075] most: usb: Fix use-after-free in hdm_disconnect hdm_disconnect() calls most_deregister_interface(), which eventually unregisters the MOST interface device with device_unregister(iface->dev). If that drops the last reference, the device core may call release_mdev() immediately while hdm_disconnect() is still executing. The old code also freed several mdev-owned allocations in hdm_disconnect() and then performed additional put_device() calls. Depending on refcount order, this could lead to use-after-free or double-free when release_mdev() ran (or when unregister paths also performed puts). Fix by moving the frees of mdev-owned allocations into release_mdev(), so they happen exactly once when the device is truly released, and by dropping the extra put_device() calls in hdm_disconnect() that are redundant after device_unregister() and most_deregister_interface(). This addresses the KASAN slab-use-after-free reported by syzbot in hdm_disconnect(). See report and stack traces in the bug link below. Reported-by: syzbot+916742d5d24f6c254761@syzkaller.appspotmail.com Cc: stable Closes: https://syzkaller.appspot.com/bug?extid=916742d5d24f6c254761 Fixes: 97a6f772f36b ("drivers: most: add USB adapter driver") Signed-off-by: Victoria Votokina Link: https://patch.msgid.link/20251010105241.4087114-2-Victoria.Votokina@kaspersky.com Signed-off-by: Greg Kroah-Hartman --- drivers/most/most_usb.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/most/most_usb.c b/drivers/most/most_usb.c index cf5be9c449a5..3d8163bb7b46 100644 --- a/drivers/most/most_usb.c +++ b/drivers/most/most_usb.c @@ -929,6 +929,10 @@ static void release_mdev(struct device *dev) { struct most_dev *mdev = to_mdev_from_dev(dev); + kfree(mdev->busy_urbs); + kfree(mdev->cap); + kfree(mdev->conf); + kfree(mdev->ep_address); kfree(mdev); } /** @@ -1121,13 +1125,6 @@ static void hdm_disconnect(struct usb_interface *interface) if (mdev->dci) device_unregister(&mdev->dci->dev); most_deregister_interface(&mdev->iface); - - kfree(mdev->busy_urbs); - kfree(mdev->cap); - kfree(mdev->conf); - kfree(mdev->ep_address); - put_device(&mdev->dci->dev); - put_device(&mdev->dev); } static int hdm_suspend(struct usb_interface *interface, pm_message_t message) From a8cc9e5fcb0e2eef21513a4fec888f5712cb8162 Mon Sep 17 00:00:00 2001 From: Victoria Votokina Date: Fri, 10 Oct 2025 13:52:41 +0300 Subject: [PATCH 0268/1075] most: usb: hdm_probe: Fix calling put_device() before device initialization The early error path in hdm_probe() can jump to err_free_mdev before &mdev->dev has been initialized with device_initialize(). Calling put_device(&mdev->dev) there triggers a device core WARN and ends up invoking kref_put(&kobj->kref, kobject_release) on an uninitialized kobject. In this path the private struct was only kmalloc'ed and the intended release is effectively kfree(mdev) anyway, so free it directly instead of calling put_device() on an uninitialized device. This removes the WARNING and fixes the pre-initialization error path. Fixes: 97a6f772f36b ("drivers: most: add USB adapter driver") Cc: stable Signed-off-by: Victoria Votokina Link: https://patch.msgid.link/20251010105241.4087114-3-Victoria.Votokina@kaspersky.com Signed-off-by: Greg Kroah-Hartman --- drivers/most/most_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/most/most_usb.c b/drivers/most/most_usb.c index 3d8163bb7b46..10064d7b7249 100644 --- a/drivers/most/most_usb.c +++ b/drivers/most/most_usb.c @@ -1097,7 +1097,7 @@ hdm_probe(struct usb_interface *interface, const struct usb_device_id *id) err_free_conf: kfree(mdev->conf); err_free_mdev: - put_device(&mdev->dev); + kfree(mdev); return ret; } From 2eead19334516c8e9927c11b448fbe512b1f18a1 Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Tue, 23 Sep 2025 23:13:08 +0530 Subject: [PATCH 0269/1075] arch_topology: Fix incorrect error check in topology_parse_cpu_capacity() Fix incorrect use of PTR_ERR_OR_ZERO() in topology_parse_cpu_capacity() which causes the code to proceed with NULL clock pointers. The current logic uses !PTR_ERR_OR_ZERO(cpu_clk) which evaluates to true for both valid pointers and NULL, leading to potential NULL pointer dereference in clk_get_rate(). Per include/linux/err.h documentation, PTR_ERR_OR_ZERO(ptr) returns: "The error code within @ptr if it is an error pointer; 0 otherwise." This means PTR_ERR_OR_ZERO() returns 0 for both valid pointers AND NULL pointers. Therefore !PTR_ERR_OR_ZERO(cpu_clk) evaluates to true (proceed) when cpu_clk is either valid or NULL, causing clk_get_rate(NULL) to be called when of_clk_get() returns NULL. Replace with !IS_ERR_OR_NULL(cpu_clk) which only proceeds for valid pointers, preventing potential NULL pointer dereference in clk_get_rate(). Cc: stable Signed-off-by: Kaushlendra Kumar Reviewed-by: Sudeep Holla Fixes: b8fe128dad8f ("arch_topology: Adjust initial CPU capacities with current freq") Link: https://patch.msgid.link/20250923174308.1771906-1-kaushlendra.kumar@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 1037169abb45..e1eff05bea4a 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -292,7 +292,7 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) * frequency (by keeping the initial capacity_freq_ref value). */ cpu_clk = of_clk_get(cpu_node, 0); - if (!PTR_ERR_OR_ZERO(cpu_clk)) { + if (!IS_ERR_OR_NULL(cpu_clk)) { per_cpu(capacity_freq_ref, cpu) = clk_get_rate(cpu_clk) / HZ_PER_KHZ; clk_put(cpu_clk); From 00aaae60faf554c27c95e93d47f200a93ff266ef Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 14 Oct 2025 18:53:53 +0300 Subject: [PATCH 0270/1075] gpio: regmap: add the .fixed_direction_output configuration parameter There are GPIO controllers such as the one present in the LX2160ARDB QIXIS FPGA which have fixed-direction input and output GPIO lines mixed together in a single register. This cannot be modeled using the gpio-regmap as-is since there is no way to present the true direction of a GPIO line. In order to make this use case possible, add a new configuration parameter - fixed_direction_output - into the gpio_regmap_config structure. This will enable user drivers to provide a bitmap that represents the fixed direction of the GPIO lines. Signed-off-by: Ioana Ciornei Acked-by: Bartosz Golaszewski Reviewed-by: Michael Walle Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-regmap.c | 26 ++++++++++++++++++++++++-- include/linux/gpio/regmap.h | 5 +++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-regmap.c b/drivers/gpio/gpio-regmap.c index ab9e4077fa60..f4267af00027 100644 --- a/drivers/gpio/gpio-regmap.c +++ b/drivers/gpio/gpio-regmap.c @@ -31,6 +31,7 @@ struct gpio_regmap { unsigned int reg_clr_base; unsigned int reg_dir_in_base; unsigned int reg_dir_out_base; + unsigned long *fixed_direction_output; #ifdef CONFIG_REGMAP_IRQ int regmap_irq_line; @@ -134,6 +135,13 @@ static int gpio_regmap_get_direction(struct gpio_chip *chip, unsigned int base, val, reg, mask; int invert, ret; + if (gpio->fixed_direction_output) { + if (test_bit(offset, gpio->fixed_direction_output)) + return GPIO_LINE_DIRECTION_OUT; + else + return GPIO_LINE_DIRECTION_IN; + } + if (gpio->reg_dat_base && !gpio->reg_set_base) return GPIO_LINE_DIRECTION_IN; if (gpio->reg_set_base && !gpio->reg_dat_base) @@ -284,6 +292,17 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config goto err_free_gpio; } + if (config->fixed_direction_output) { + gpio->fixed_direction_output = bitmap_alloc(chip->ngpio, + GFP_KERNEL); + if (!gpio->fixed_direction_output) { + ret = -ENOMEM; + goto err_free_gpio; + } + bitmap_copy(gpio->fixed_direction_output, + config->fixed_direction_output, chip->ngpio); + } + /* if not set, assume there is only one register */ gpio->ngpio_per_reg = config->ngpio_per_reg; if (!gpio->ngpio_per_reg) @@ -300,7 +319,7 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config ret = gpiochip_add_data(chip, gpio); if (ret < 0) - goto err_free_gpio; + goto err_free_bitmap; #ifdef CONFIG_REGMAP_IRQ if (config->regmap_irq_chip) { @@ -309,7 +328,7 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config config->regmap_irq_line, config->regmap_irq_flags, 0, config->regmap_irq_chip, &gpio->irq_chip_data); if (ret) - goto err_free_gpio; + goto err_free_bitmap; irq_domain = regmap_irq_get_domain(gpio->irq_chip_data); } else @@ -326,6 +345,8 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config err_remove_gpiochip: gpiochip_remove(chip); +err_free_bitmap: + bitmap_free(gpio->fixed_direction_output); err_free_gpio: kfree(gpio); return ERR_PTR(ret); @@ -344,6 +365,7 @@ void gpio_regmap_unregister(struct gpio_regmap *gpio) #endif gpiochip_remove(&gpio->gpio_chip); + bitmap_free(gpio->fixed_direction_output); kfree(gpio); } EXPORT_SYMBOL_GPL(gpio_regmap_unregister); diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h index 622a2939ebe0..87983a5f3681 100644 --- a/include/linux/gpio/regmap.h +++ b/include/linux/gpio/regmap.h @@ -38,6 +38,10 @@ struct regmap; * offset to a register/bitmask pair. If not * given the default gpio_regmap_simple_xlate() * is used. + * @fixed_direction_output: + * (Optional) Bitmap representing the fixed direction of + * the GPIO lines. Useful when there are GPIO lines with a + * fixed direction mixed together in the same register. * @drvdata: (Optional) Pointer to driver specific data which is * not used by gpio-remap but is provided "as is" to the * driver callback(s). @@ -85,6 +89,7 @@ struct gpio_regmap_config { int reg_stride; int ngpio_per_reg; struct irq_domain *irq_domain; + unsigned long *fixed_direction_output; #ifdef CONFIG_REGMAP_IRQ struct regmap_irq_chip *regmap_irq_chip; From 2ba5772e530f73eb847fb96ce6c4017894869552 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Mon, 20 Oct 2025 17:51:46 +0900 Subject: [PATCH 0271/1075] gpio: idio-16: Define fixed direction of the GPIO lines The direction of the IDIO-16 GPIO lines is fixed with the first 16 lines as output and the remaining 16 lines as input. Set the gpio_config fixed_direction_output member to represent the fixed direction of the GPIO lines. Fixes: db02247827ef ("gpio: idio-16: Migrate to the regmap API") Reported-by: Mark Cave-Ayland Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com Suggested-by: Michael Walle Cc: stable@vger.kernel.org # ae495810cffe: gpio: regmap: add the .fixed_direction_output configuration parameter Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-3-ebeb50e93c33@kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-idio-16.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c index 0103be977c66..4fbae6f6a497 100644 --- a/drivers/gpio/gpio-idio-16.c +++ b/drivers/gpio/gpio-idio-16.c @@ -6,6 +6,7 @@ #define DEFAULT_SYMBOL_NAMESPACE "GPIO_IDIO_16" +#include #include #include #include @@ -107,6 +108,7 @@ int devm_idio_16_regmap_register(struct device *const dev, struct idio_16_data *data; struct regmap_irq_chip *chip; struct regmap_irq_chip_data *chip_data; + DECLARE_BITMAP(fixed_direction_output, IDIO_16_NGPIO); if (!config->parent) return -EINVAL; @@ -164,6 +166,9 @@ int devm_idio_16_regmap_register(struct device *const dev, gpio_config.irq_domain = regmap_irq_get_domain(chip_data); gpio_config.reg_mask_xlate = idio_16_reg_mask_xlate; + bitmap_from_u64(fixed_direction_output, GENMASK_U64(15, 0)); + gpio_config.fixed_direction_output = fixed_direction_output; + return PTR_ERR_OR_ZERO(devm_gpio_regmap_register(dev, &gpio_config)); } EXPORT_SYMBOL_GPL(devm_idio_16_regmap_register); From 0fd7e7a1ad7c5fb8801a9c1661fc4e1ae4d9b655 Mon Sep 17 00:00:00 2001 From: Leo Martins Date: Mon, 20 Oct 2025 16:16:15 -0700 Subject: [PATCH 0272/1075] btrfs: fix delayed_node ref_tracker use after free Move the print before releasing the delayed node. In my initial testing there was a bug that was causing delayed_nodes to not get freed which is why I put the print after the release. This obviously neglects the case where the delayed node is properly freed. Add condition to make sure we only print if we have more than one reference to the delayed_node to prevent printing when we only have the reference taken in btrfs_kill_all_delayed_nodes(). Fixes: b767a28d6154 ("btrfs: print leaked references in kill_all_delayed_nodes()") Tested-by: Christoph Hellwig Signed-off-by: Leo Martins Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/delayed-inode.c | 2 +- fs/btrfs/delayed-inode.h | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 41e37f7f67cc..3df7b9d7fbe8 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -2110,9 +2110,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) for (int i = 0; i < count; i++) { __btrfs_kill_delayed_node(delayed_nodes[i]); + btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]); btrfs_release_delayed_node(delayed_nodes[i], &delayed_node_trackers[i]); - btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]); } } } diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 0d949edc0caf..b09d4ec8c77d 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -219,6 +219,13 @@ static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER)) return; + /* + * Only print if there are leaked references. The caller is + * holding one reference, so if refs == 1 there is no leak. + */ + if (refcount_read(&node->refs) == 1) + return; + ref_tracker_dir_print(&node->ref_dir.dir, BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT); } From ada7d45b568abe4f1fd9c53d66e05fbea300674b Mon Sep 17 00:00:00 2001 From: Amit Dhingra Date: Tue, 21 Oct 2025 07:07:20 -0500 Subject: [PATCH 0273/1075] btrfs: ref-verify: fix IS_ERR() vs NULL check in btrfs_build_ref_tree() btrfs_extent_root()/btrfs_global_root() does not return error pointers, it returns NULL on error. Reported-by: Dan Carpenter Link: https://lore.kernel.org/all/aNJfvxj0anEnk9Dm@stanley.mountain/ Fixes : ed4e6b5d644c ("btrfs: ref-verify: handle damaged extent root tree") CC: stable@vger.kernel.org # 6.17+ Signed-off-by: Amit Dhingra Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ref-verify.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index de4cb0f3fbd0..e9224145d754 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -982,7 +982,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) extent_root = btrfs_extent_root(fs_info, 0); /* If the extent tree is damaged we cannot ignore it (IGNOREBADROOTS). */ - if (IS_ERR(extent_root)) { + if (!extent_root) { btrfs_warn(fs_info, "ref-verify: extent tree not available, disabling"); btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); return 0; From 37b9dd0d114a0e38c502695e30f55a74fb0c37d0 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 22 Oct 2025 00:25:45 +0200 Subject: [PATCH 0274/1075] usb: raw-gadget: do not limit transfer length Drop the check on the maximum transfer length in Raw Gadget for both control and non-control transfers. Limiting the transfer length causes a problem with emulating USB devices whose full configuration descriptor exceeds PAGE_SIZE in length. Overall, there does not appear to be any reason to enforce any kind of transfer length limit on the Raw Gadget side for either control or non-control transfers, so let's just drop the related check. Cc: stable Fixes: f2c2e717642c ("usb: gadget: add raw-gadget interface") Signed-off-by: Andrey Konovalov Link: https://patch.msgid.link/a6024e8eab679043e9b8a5defdb41c4bda62f02b.1761085528.git.andreyknvl@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 20165e1582d9..b71680c58de6 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -667,8 +667,6 @@ static void *raw_alloc_io_data(struct usb_raw_ep_io *io, void __user *ptr, return ERR_PTR(-EINVAL); if (!usb_raw_io_flags_valid(io->flags)) return ERR_PTR(-EINVAL); - if (io->length > PAGE_SIZE) - return ERR_PTR(-EINVAL); if (get_from_user) data = memdup_user(ptr + sizeof(*io), io->length); else { From dfc2cf4dcaa03601cd4ca0f7def88b2630fca6ab Mon Sep 17 00:00:00 2001 From: Tim Guttzeit Date: Mon, 20 Oct 2025 15:39:04 +0200 Subject: [PATCH 0275/1075] usb/core/quirks: Add Huawei ME906S to wakeup quirk The list of Huawei LTE modules needing the quirk fixing spurious wakeups was missing the IDs of the Huawei ME906S module, therefore suspend did not work. Cc: stable Signed-off-by: Tim Guttzeit Signed-off-by: Werner Sembach Link: https://patch.msgid.link/20251020134304.35079-1-wse@tuxedocomputers.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f5bc53875330..47f589c4104a 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -467,6 +467,8 @@ static const struct usb_device_id usb_quirk_list[] = { /* Huawei 4G LTE module */ { USB_DEVICE(0x12d1, 0x15bb), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, + { USB_DEVICE(0x12d1, 0x15c1), .driver_info = + USB_QUIRK_DISCONNECT_SUSPEND }, { USB_DEVICE(0x12d1, 0x15c3), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, From 2d8713f807a49b8a67c221670e50ae04967e915d Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Mon, 13 Oct 2025 11:43:40 +0200 Subject: [PATCH 0276/1075] tcpm: switch check for role_sw device with fw_node When there is no port entry in the tcpci entry itself, the driver will trigger an error message "OF: graph: no port node found in /...../typec" . It is documented that the dts node should contain an connector entry with ports and several port pointing to devices with usb-role-switch property set. Only when those connector entry is missing, it should check for port entries in the main node. We switch the search order for looking after ports, which will avoid the failure message while there are explicit connector entries. Fixes: d56de8c9a17d ("usb: typec: tcpm: try to get role switch from tcpc fwnode") Cc: stable Signed-off-by: Michael Grzeschik Reviewed-by: Heikki Krogerus Reviewed-by: Badhri Jagan Sridharan Link: https://patch.msgid.link/20251013-b4-ml-topic-tcpm-v2-1-63c9b2ab8a0b@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index b2a568a5bc9b..cc78770509db 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -7876,9 +7876,9 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) port->partner_desc.identity = &port->partner_ident; - port->role_sw = usb_role_switch_get(port->dev); + port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); if (!port->role_sw) - port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); + port->role_sw = usb_role_switch_get(port->dev); if (IS_ERR(port->role_sw)) { err = PTR_ERR(port->role_sw); goto out_destroy_wq; From bd721ec7dedcc24ced51559e42a39140b59dfd08 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Oct 2025 11:29:56 -0700 Subject: [PATCH 0277/1075] xfs: don't set bt_nr_sectors to a negative number xfs_daddr_t is a signed type, which means that xfs_buf_map_verify is using a signed comparison. This causes problems if bt_nr_sectors is never overridden (e.g. in the case of an xfbtree for rmap btree repairs) because even daddr 0 can't pass the verifier test in that case. Define an explicit max constant and set the initial bt_nr_sectors to a positive value. Found by xfs/422. Cc: stable@vger.kernel.org # v6.18-rc1 Fixes: 42852fe57c6d2a ("xfs: track the number of blocks in each buftarg") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 2 +- fs/xfs/xfs_buf.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 773d959965dc..47edf3041631 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1751,7 +1751,7 @@ xfs_init_buftarg( const char *descr) { /* The maximum size of the buftarg is only known once the sb is read. */ - btp->bt_nr_sectors = (xfs_daddr_t)-1; + btp->bt_nr_sectors = XFS_BUF_DADDR_MAX; /* Set up device logical sector size mask */ btp->bt_logical_sectorsize = logical_sectorsize; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 8fa7bdf59c91..e25cd2a160f3 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -22,6 +22,7 @@ extern struct kmem_cache *xfs_buf_cache; */ struct xfs_buf; +#define XFS_BUF_DADDR_MAX ((xfs_daddr_t) S64_MAX) #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) #define XBF_READ (1u << 0) /* buffer intended for reading from device */ From 630785bfbe12c3ee3ebccd8b530a98d632b7e39d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Oct 2025 11:30:12 -0700 Subject: [PATCH 0278/1075] xfs: always warn about deprecated mount options The deprecation of the 'attr2' mount option in 6.18 wasn't entirely successful because nobody noticed that the kernel never printed a warning about attr2 being set in fstab if the only xfs filesystem is the root fs; the initramfs mounts the root fs with no mount options; and the init scripts only conveyed the fstab options by remounting the root fs. Fix this by making it complain all the time. Cc: stable@vger.kernel.org # v5.13 Fixes: 92cf7d36384b99 ("xfs: Skip repetitive warnings about mount options") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_super.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 9d51186b24dd..c53f2edf92e7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1379,16 +1379,25 @@ suffix_kstrtoull( static inline void xfs_fs_warn_deprecated( struct fs_context *fc, - struct fs_parameter *param, - uint64_t flag, - bool value) + struct fs_parameter *param) { - /* Don't print the warning if reconfiguring and current mount point - * already had the flag set + /* + * Always warn about someone passing in a deprecated mount option. + * Previously we wouldn't print the warning if we were reconfiguring + * and current mount point already had the flag set, but that was not + * the right thing to do. + * + * Many distributions mount the root filesystem with no options in the + * initramfs and rely on mount -a to remount the root fs with the + * options in fstab. However, the old behavior meant that there would + * never be a warning about deprecated mount options for the root fs in + * /etc/fstab. On a single-fs system, that means no warning at all. + * + * Compounding this problem are distribution scripts that copy + * /proc/mounts to fstab, which means that we can't remove mount + * options unless we're 100% sure they have only ever been advertised + * in /proc/mounts in response to explicitly provided mount options. */ - if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && - !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) - return; xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); } From 3e7ec343f066cb3b6916239680ab6ad44537b453 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Oct 2025 11:30:27 -0700 Subject: [PATCH 0279/1075] xfs: loudly complain about defunct mount options Apparently we can never deprecate mount options in this project, because it will invariably turn out that some foolish userspace depends on some behavior and break. From Oleksandr Natalenko: In v6.18, the attr2 XFS mount option is removed. This may silently break system boot if the attr2 option is still present in /etc/fstab for rootfs. Consider Arch Linux that is being set up from scratch with / being formatted as XFS. The genfstab command that is used to generate /etc/fstab produces something like this by default: /dev/sda2 on / type xfs (rw,relatime,attr2,discard,inode64,logbufs=8,logbsize=32k,noquota) Once the system is set up and rebooted, there's no deprecation warning seen in the kernel log: # cat /proc/cmdline root=UUID=77b42de2-397e-47ee-a1ef-4dfd430e47e9 rootflags=discard rd.luks.options=discard quiet # dmesg | grep -i xfs [ 2.409818] SGI XFS with ACLs, security attributes, realtime, scrub, repair, quota, no debug enabled [ 2.415341] XFS (sda2): Mounting V5 Filesystem 77b42de2-397e-47ee-a1ef-4dfd430e47e9 [ 2.442546] XFS (sda2): Ending clean mount Although as per the deprecation intention, it should be there. Vlastimil (in Cc) suggests this is because xfs_fs_warn_deprecated() doesn't produce any warning by design if the XFS FS is set to be rootfs and gets remounted read-write during boot. This imposes two problems: 1) a user doesn't see the deprecation warning; and 2) with v6.18 kernel, the read-write remount fails because of unknown attr2 option rendering system unusable: systemd[1]: Switching root. systemd-remount-fs[225]: /usr/bin/mount for / exited with exit status 32. # mount -o rw / mount: /: fsconfig() failed: xfs: Unknown parameter 'attr2'. Thorsten (in Cc) suggested reporting this as a user-visible regression. From my PoV, although the deprecation is in place for 5 years already, it may not be visible enough as the warning is not emitted for rootfs. Considering the amount of systems set up with XFS on /, this may impose a mass problem for users. Vlastimil suggested making attr2 option a complete noop instead of removing it. IOWs, the initrd mounts the root fs with (I assume) no mount options, and mount -a remounts with whatever options are in fstab. However, XFS doesn't complain about deprecated mount options during a remount, so technically speaking we were not warning all users in all combinations that they were heading for a cliff. Gotcha!! Now, how did 'attr2' get slurped up on so many systems? The old code would put that in /proc/mounts if the filesystem happened to be in attr2 mode, even if user hadn't mounted with any such option. IOWs, this is because someone thought it would be a good idea to advertise system state via /proc/mounts. The easy way to fix this is to reintroduce the four mount options but map them to a no-op option that ignores them, and hope that nobody's depending on attr2 to appear in /proc/mounts. (Hint: use the fsgeometry ioctl). But we've learned our lesson, so complain as LOUDLY as possible about the deprecation. Lessons learned: 1. Don't expose system state via /proc/mounts; the only strings that ought to be there are options *explicitly* provided by the user. 2. Never tidy, it's not worth the stress and irritation. Reported-by: Vlastimil Babka Reported-by: Oleksandr Natalenko Cc: stable@vger.kernel.org # v6.18-rc1 Fixes: b9a176e54162f8 ("xfs: remove deprecated mount options") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_super.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c53f2edf92e7..1067ebb3b001 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -102,7 +102,7 @@ static const struct constant_table dax_param_enums[] = { * Table driven mount option parser. */ enum { - Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, + Op_deprecated, Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, @@ -114,7 +114,21 @@ enum { Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, }; +#define fsparam_dead(NAME) \ + __fsparam(NULL, (NAME), Op_deprecated, fs_param_deprecated, NULL) + static const struct fs_parameter_spec xfs_fs_parameters[] = { + /* + * These mount options were supposed to be deprecated in September 2025 + * but the deprecation warning was buggy, so not all users were + * notified. The deprecation is now obnoxiously loud and postponed to + * September 2030. + */ + fsparam_dead("attr2"), + fsparam_dead("noattr2"), + fsparam_dead("ikeep"), + fsparam_dead("noikeep"), + fsparam_u32("logbufs", Opt_logbufs), fsparam_string("logbsize", Opt_logbsize), fsparam_string("logdev", Opt_logdev), @@ -1423,6 +1437,9 @@ xfs_fs_parse_param( return opt; switch (opt) { + case Op_deprecated: + xfs_fs_warn_deprecated(fc, param); + return 0; case Opt_logbufs: parsing_mp->m_logbufs = result.uint_32; return 0; @@ -1543,7 +1560,6 @@ xfs_fs_parse_param( xfs_mount_set_dax_mode(parsing_mp, result.uint_32); return 0; #endif - /* Following mount options will be removed in September 2025 */ case Opt_max_open_zones: parsing_mp->m_max_open_zones = result.uint_32; return 0; From f477af0cfa0487eddec66ffe10fd9df628ba6f52 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Oct 2025 11:30:43 -0700 Subject: [PATCH 0280/1075] xfs: fix locking in xchk_nlinks_collect_dir On a filesystem with parent pointers, xchk_nlinks_collect_dir walks both the directory entries (data fork) and the parent pointers (attr fork) to determine the correct link count. Unfortunately I forgot to update the lock mode logic to handle the case of a directory whose attr fork is in btree format and has not yet been loaded *and* whose data fork doesn't need loading. This leads to a bunch of assertions from xfs/286 in xfs_iread_extents because we only took ILOCK_SHARED, not ILOCK_EXCL. You'd need the rare happenstance of a directory with a large number of non-pptr extended attributes set and enough memory pressure to cause the directory to be evicted and partially reloaded from disk. I /think/ this only started in 6.18-rc1 because I've started seeing OOM errors with the maple tree slab using 70% of memory, and this didn't happen in 6.17. Yay dynamic systems! Cc: stable@vger.kernel.org # v6.10 Fixes: 77ede5f44b0d86 ("xfs: walk directory parent pointers to determine backref count") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/nlinks.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 26721fab5cab..091c79e432e5 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -376,6 +376,36 @@ xchk_nlinks_collect_pptr( return error; } +static uint +xchk_nlinks_ilock_dir( + struct xfs_inode *ip) +{ + uint lock_mode = XFS_ILOCK_SHARED; + + /* + * We're going to scan the directory entries, so we must be ready to + * pull the data fork mappings into memory if they aren't already. + */ + if (xfs_need_iread_extents(&ip->i_df)) + lock_mode = XFS_ILOCK_EXCL; + + /* + * We're going to scan the parent pointers, so we must be ready to + * pull the attr fork mappings into memory if they aren't already. + */ + if (xfs_has_parent(ip->i_mount) && xfs_inode_has_attr_fork(ip) && + xfs_need_iread_extents(&ip->i_af)) + lock_mode = XFS_ILOCK_EXCL; + + /* + * Take the IOLOCK so that other threads cannot start a directory + * update while we're scanning. + */ + lock_mode |= XFS_IOLOCK_SHARED; + xfs_ilock(ip, lock_mode); + return lock_mode; +} + /* Walk a directory to bump the observed link counts of the children. */ STATIC int xchk_nlinks_collect_dir( @@ -394,8 +424,7 @@ xchk_nlinks_collect_dir( return 0; /* Prevent anyone from changing this directory while we walk it. */ - xfs_ilock(dp, XFS_IOLOCK_SHARED); - lock_mode = xfs_ilock_data_map_shared(dp); + lock_mode = xchk_nlinks_ilock_dir(dp); /* * The dotdot entry of an unlinked directory still points to the last @@ -452,7 +481,6 @@ xchk_nlinks_collect_dir( xchk_iscan_abort(&xnc->collect_iscan); out_unlock: xfs_iunlock(dp, lock_mode); - xfs_iunlock(dp, XFS_IOLOCK_SHARED); return error; } From db82b8dbf5f06d7b1abec4e1326ed8c02fa16897 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 20 Oct 2025 17:03:28 +0200 Subject: [PATCH 0281/1075] PM: runtime: Fix conditional guard definitions Since pm_runtime_get_active() returns 0 on success, all of the DEFINE_GUARD_COND() macros in pm_runtime.h need the "_RET == 0" condition at the end of the argument list or they would not work correctly. Fixes: 9a0abc39450a ("PM: runtime: Add auto-cleanup macros for "resume and get" operations") Reported-by: kernel test robot Link: https://lore.kernel.org/linux-pm/202510191529.BCyjKlLQ-lkp@intel.com/ Signed-off-by: Rafael J. Wysocki Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Tested-by: Farhan Ali Link: https://patch.msgid.link/5943878.DvuYhMxLoT@rafael.j.wysocki --- include/linux/pm_runtime.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index a3f44f6c2da1..0b436e15f4cd 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -629,13 +629,13 @@ DEFINE_GUARD(pm_runtime_active_auto, struct device *, * device. */ DEFINE_GUARD_COND(pm_runtime_active, _try, - pm_runtime_get_active(_T, RPM_TRANSPARENT)) + pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0) DEFINE_GUARD_COND(pm_runtime_active, _try_enabled, - pm_runtime_resume_and_get(_T)) + pm_runtime_resume_and_get(_T), _RET == 0) DEFINE_GUARD_COND(pm_runtime_active_auto, _try, - pm_runtime_get_active(_T, RPM_TRANSPARENT)) + pm_runtime_get_active(_T, RPM_TRANSPARENT), _RET == 0) DEFINE_GUARD_COND(pm_runtime_active_auto, _try_enabled, - pm_runtime_resume_and_get(_T)) + pm_runtime_resume_and_get(_T), _RET == 0) /** * pm_runtime_put_sync - Drop device usage counter and run "idle check" if 0. From a28352cf2d2f8380e7aca8cb61682396dca7a991 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Mon, 20 Oct 2025 09:49:41 +0800 Subject: [PATCH 0282/1075] mmc: sdhci-of-dwcmshc: Change DLL_STRBIN_TAPNUM_DEFAULT to 0x4 strbin signal delay under 0x8 configuration is not stable after massive test. The recommandation of it should be 0x4. Signed-off-by: Shawn Lin Tested-by: Alexey Charkov Tested-by: Hugh Cole-Baker Fixes: 08f3dff799d4 ("mmc: sdhci-of-dwcmshc: add rockchip platform support") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-dwcmshc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index eebd45389956..5b61401a7f3d 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -94,7 +94,7 @@ #define DLL_TXCLK_TAPNUM_DEFAULT 0x10 #define DLL_TXCLK_TAPNUM_90_DEGREES 0xA #define DLL_TXCLK_TAPNUM_FROM_SW BIT(24) -#define DLL_STRBIN_TAPNUM_DEFAULT 0x8 +#define DLL_STRBIN_TAPNUM_DEFAULT 0x4 #define DLL_STRBIN_TAPNUM_FROM_SW BIT(24) #define DLL_STRBIN_DELAY_NUM_SEL BIT(26) #define DLL_STRBIN_DELAY_NUM_OFFSET 16 From e4185bed738da755b191aa3f2e16e8b48450e1b8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 30 Sep 2025 15:32:34 +0300 Subject: [PATCH 0283/1075] mtdchar: fix integer overflow in read/write ioctls The "req.start" and "req.len" variables are u64 values that come from the user at the start of the function. We mask away the high 32 bits of "req.len" so that's capped at U32_MAX but the "req.start" variable can go up to U64_MAX which means that the addition can still integer overflow. Use check_add_overflow() to fix this bug. Fixes: 095bb6e44eb1 ("mtdchar: add MEMREAD ioctl") Fixes: 6420ac0af95d ("mtdchar: prevent unbounded allocation in MEMWRITE ioctl") Cc: stable@vger.kernel.org Signed-off-by: Dan Carpenter Signed-off-by: Miquel Raynal --- drivers/mtd/mtdchar.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 8dc4f5c493fc..335c702633ff 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -599,6 +599,7 @@ mtdchar_write_ioctl(struct mtd_info *mtd, struct mtd_write_req __user *argp) uint8_t *datbuf = NULL, *oobbuf = NULL; size_t datbuf_len, oobbuf_len; int ret = 0; + u64 end; if (copy_from_user(&req, argp, sizeof(req))) return -EFAULT; @@ -618,7 +619,7 @@ mtdchar_write_ioctl(struct mtd_info *mtd, struct mtd_write_req __user *argp) req.len &= 0xffffffff; req.ooblen &= 0xffffffff; - if (req.start + req.len > mtd->size) + if (check_add_overflow(req.start, req.len, &end) || end > mtd->size) return -EINVAL; datbuf_len = min_t(size_t, req.len, mtd->erasesize); @@ -698,6 +699,7 @@ mtdchar_read_ioctl(struct mtd_info *mtd, struct mtd_read_req __user *argp) size_t datbuf_len, oobbuf_len; size_t orig_len, orig_ooblen; int ret = 0; + u64 end; if (copy_from_user(&req, argp, sizeof(req))) return -EFAULT; @@ -724,7 +726,7 @@ mtdchar_read_ioctl(struct mtd_info *mtd, struct mtd_read_req __user *argp) req.len &= 0xffffffff; req.ooblen &= 0xffffffff; - if (req.start + req.len > mtd->size) { + if (check_add_overflow(req.start, req.len, &end) || end > mtd->size) { ret = -EINVAL; goto out; } From 9225f02ff201837e1443076f37a3c008140d1835 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Oct 2025 12:30:10 +0300 Subject: [PATCH 0284/1075] mtd: nand: realtek-ecc: Fix a IS_ERR() vs NULL bug in probe The dma_alloc_noncoherent() function doesn't return error pointers, it returns NULL on error. Fix the error checking to match. Fixes: 3148d0e5b1c5 ("mtd: nand: realtek-ecc: Add Realtek external ECC engine support") Signed-off-by: Dan Carpenter Reviewed-by: Geert Uytterhoeven Signed-off-by: Miquel Raynal --- drivers/mtd/nand/ecc-realtek.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/ecc-realtek.c b/drivers/mtd/nand/ecc-realtek.c index 7d718934c909..7c275f1eb4a7 100644 --- a/drivers/mtd/nand/ecc-realtek.c +++ b/drivers/mtd/nand/ecc-realtek.c @@ -418,8 +418,8 @@ static int rtl_ecc_probe(struct platform_device *pdev) rtlc->buf = dma_alloc_noncoherent(dev, RTL_ECC_DMA_SIZE, &rtlc->buf_dma, DMA_BIDIRECTIONAL, GFP_KERNEL); - if (IS_ERR(rtlc->buf)) - return PTR_ERR(rtlc->buf); + if (!rtlc->buf) + return -ENOMEM; rtlc->dev = dev; rtlc->engine.dev = dev; From 0d9c80aa572182d4b1464826cd77aa8973213216 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 8 Oct 2025 11:47:15 +0200 Subject: [PATCH 0285/1075] mtd: nand: MTD_NAND_ECC_REALTEK should depend on HAS_DMA If CONFIG_NO_DMA=y: ERROR: modpost: "dma_free_pages" [drivers/mtd/nand/ecc-realtek.ko] undefined! ERROR: modpost: "dma_alloc_pages" [drivers/mtd/nand/ecc-realtek.ko] undefined! The driver cannot function without DMA, hence fix this by adding a dependency on HAS_DMA. Fixes: 3148d0e5b1c5733d ("mtd: nand: realtek-ecc: Add Realtek external ECC engine support") Signed-off-by: Geert Uytterhoeven Signed-off-by: Miquel Raynal --- drivers/mtd/nand/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 4a17271076bc..1e57c8de8578 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -63,7 +63,7 @@ config MTD_NAND_ECC_MEDIATEK config MTD_NAND_ECC_REALTEK tristate "Realtek RTL93xx hardware ECC engine" - depends on HAS_IOMEM + depends on HAS_IOMEM && HAS_DMA depends on MACH_REALTEK_RTL || COMPILE_TEST select MTD_NAND_ECC help From 9631350885929819d4e46c6521df35960b472ef3 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Mon, 20 Oct 2025 20:53:33 +0800 Subject: [PATCH 0286/1075] mtd: rawnand: realtek: Make rtl_ecc_engine_ops const The rtl_ecc_engine_ops structure is only used to provide a set of callback functions and is never modified after initialization. Mark it as const so it can be placed in the read-only section, which improves safety and allows better compiler optimization. Signed-off-by: Li Qiang Signed-off-by: Miquel Raynal --- drivers/mtd/nand/ecc-realtek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/ecc-realtek.c b/drivers/mtd/nand/ecc-realtek.c index 7c275f1eb4a7..0046da37ea3e 100644 --- a/drivers/mtd/nand/ecc-realtek.c +++ b/drivers/mtd/nand/ecc-realtek.c @@ -380,7 +380,7 @@ static void rtl_ecc_cleanup_ctx(struct nand_device *nand) nand_ecc_cleanup_req_tweaking(&ctx->req_ctx); } -static struct nand_ecc_engine_ops rtl_ecc_engine_ops = { +static const struct nand_ecc_engine_ops rtl_ecc_engine_ops = { .init_ctx = rtl_ecc_init_ctx, .cleanup_ctx = rtl_ecc_cleanup_ctx, .prepare_io_req = rtl_ecc_prepare_io_req, From ef8fef45c74b5a0059488fda2df65fa133f7d7d0 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Tue, 23 Sep 2025 18:47:06 +0300 Subject: [PATCH 0287/1075] tty: serial: sh-sci: fix RSCI FIFO overrun handling The receive error handling code is shared between RSCI and all other SCIF port types, but the RSCI overrun_reg is specified as a memory offset, while for other SCIF types it is an enum value used to index into the sci_port_params->regs array, as mentioned above the sci_serial_in() function. For RSCI, the overrun_reg is CSR (0x48), causing the sci_getreg() call inside the sci_handle_fifo_overrun() function to index outside the bounds of the regs array, which currently has a size of 20, as specified by SCI_NR_REGS. Because of this, we end up accessing memory outside of RSCI's rsci_port_params structure, which, when interpreted as a plat_sci_reg, happens to have a non-zero size, causing the following WARN when sci_serial_in() is called, as the accidental size does not match the supported register sizes. The existence of the overrun_reg needs to be checked because SCIx_SH3_SCIF_REGTYPE has overrun_reg set to SCLSR, but SCLSR is not present in the regs array. Avoid calling sci_getreg() for port types which don't use standard register handling. Use the ops->read_reg() and ops->write_reg() functions to properly read and write registers for RSCI, and change the type of the status variable to accommodate the 32-bit CSR register. sci_getreg() and sci_serial_in() are also called with overrun_reg in the sci_mpxed_interrupt() interrupt handler, but that code path is not used for RSCI, as it does not have a muxed interrupt. ------------[ cut here ]------------ Invalid register access WARNING: CPU: 0 PID: 0 at drivers/tty/serial/sh-sci.c:522 sci_serial_in+0x38/0xac Modules linked in: renesas_usbhs at24 rzt2h_adc industrialio_adc sha256 cfg80211 bluetooth ecdh_generic ecc rfkill fuse drm backlight ipv6 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.17.0-rc1+ #30 PREEMPT Hardware name: Renesas RZ/T2H EVK Board based on r9a09g077m44 (DT) pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : sci_serial_in+0x38/0xac lr : sci_serial_in+0x38/0xac sp : ffff800080003e80 x29: ffff800080003e80 x28: ffff800082195b80 x27: 000000000000000d x26: ffff8000821956d0 x25: 0000000000000000 x24: ffff800082195b80 x23: ffff000180e0d800 x22: 0000000000000010 x21: 0000000000000000 x20: 0000000000000010 x19: ffff000180e72000 x18: 000000000000000a x17: ffff8002bcee7000 x16: ffff800080000000 x15: 0720072007200720 x14: 0720072007200720 x13: 0720072007200720 x12: 0720072007200720 x11: 0000000000000058 x10: 0000000000000018 x9 : ffff8000821a6a48 x8 : 0000000000057fa8 x7 : 0000000000000406 x6 : ffff8000821fea48 x5 : ffff00033ef88408 x4 : ffff8002bcee7000 x3 : ffff800082195b80 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff800082195b80 Call trace: sci_serial_in+0x38/0xac (P) sci_handle_fifo_overrun.isra.0+0x70/0x134 sci_er_interrupt+0x50/0x39c __handle_irq_event_percpu+0x48/0x140 handle_irq_event+0x44/0xb0 handle_fasteoi_irq+0xf4/0x1a0 handle_irq_desc+0x34/0x58 generic_handle_domain_irq+0x1c/0x28 gic_handle_irq+0x4c/0x140 call_on_irq_stack+0x30/0x48 do_interrupt_handler+0x80/0x84 el1_interrupt+0x34/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 default_idle_call+0x28/0x58 (P) do_idle+0x1f8/0x250 cpu_startup_entry+0x34/0x3c rest_init+0xd8/0xe0 console_on_rootfs+0x0/0x6c __primary_switched+0x88/0x90 ---[ end trace 0000000000000000 ]--- Cc: stable Fixes: 0666e3fe95ab ("serial: sh-sci: Add support for RZ/T2H SCI") Signed-off-by: Cosmin Tanislav Link: https://patch.msgid.link/20250923154707.1089900-1-cosmin-gabriel.tanislav.xa@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 538b2f991609..62bb62b82cbe 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1014,16 +1014,18 @@ static int sci_handle_fifo_overrun(struct uart_port *port) struct sci_port *s = to_sci_port(port); const struct plat_sci_reg *reg; int copied = 0; - u16 status; + u32 status; - reg = sci_getreg(port, s->params->overrun_reg); - if (!reg->size) - return 0; + if (s->type != SCI_PORT_RSCI) { + reg = sci_getreg(port, s->params->overrun_reg); + if (!reg->size) + return 0; + } - status = sci_serial_in(port, s->params->overrun_reg); + status = s->ops->read_reg(port, s->params->overrun_reg); if (status & s->params->overrun_mask) { status &= ~s->params->overrun_mask; - sci_serial_out(port, s->params->overrun_reg, status); + s->ops->write_reg(port, s->params->overrun_reg, status); port->icount.overrun++; From e7cbce761fe3fcbcb49bcf30d4f8ca5e1a9ee2a0 Mon Sep 17 00:00:00 2001 From: Florian Eckert Date: Wed, 24 Sep 2025 15:41:15 +0200 Subject: [PATCH 0288/1075] serial: 8250_exar: add support for Advantech 2 port card with Device ID 0x0018 The Advantech 2-port serial card with PCI vendor=0x13fe and device=0x0018 has a 'XR17V35X' chip installed on the circuit board. Therefore, this driver can be used instead of theu outdated out-of-tree driver from the manufacturer. Signed-off-by: Florian Eckert Cc: stable Link: https://patch.msgid.link/20250924134115.2667650-1-fe@dev.tdt.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_exar.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 04a0cbab02c2..b9cc0b786ca6 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -40,6 +40,8 @@ #define PCI_DEVICE_ID_ACCESSIO_COM_4SM 0x10db #define PCI_DEVICE_ID_ACCESSIO_COM_8SM 0x10ea +#define PCI_DEVICE_ID_ADVANTECH_XR17V352 0x0018 + #define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002 #define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004 #define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a @@ -1622,6 +1624,12 @@ static const struct exar8250_board pbn_fastcom35x_8 = { .exit = pci_xr17v35x_exit, }; +static const struct exar8250_board pbn_adv_XR17V352 = { + .num_ports = 2, + .setup = pci_xr17v35x_setup, + .exit = pci_xr17v35x_exit, +}; + static const struct exar8250_board pbn_exar_XR17V4358 = { .num_ports = 12, .setup = pci_xr17v35x_setup, @@ -1696,6 +1704,9 @@ static const struct pci_device_id exar_pci_tbl[] = { USR_DEVICE(XR17C152, 2980, pbn_exar_XR17C15x), USR_DEVICE(XR17C152, 2981, pbn_exar_XR17C15x), + /* ADVANTECH devices */ + EXAR_DEVICE(ADVANTECH, XR17V352, pbn_adv_XR17V352), + /* Exar Corp. XR17C15[248] Dual/Quad/Octal UART */ EXAR_DEVICE(EXAR, XR17C152, pbn_exar_XR17C15x), EXAR_DEVICE(EXAR, XR17C154, pbn_exar_XR17C15x), From 1c05bf6c0262f946571a37678250193e46b1ff0f Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 6 Oct 2025 10:20:02 -0400 Subject: [PATCH 0289/1075] serial: sc16is7xx: remove useless enable of enhanced features Commit 43c51bb573aa ("sc16is7xx: make sure device is in suspend once probed") permanently enabled access to the enhanced features in sc16is7xx_probe(), and it is never disabled after that. Therefore, remove re-enable of enhanced features in sc16is7xx_set_baud(). This eliminates a potential useless read + write cycle each time the baud rate is reconfigured. Fixes: 43c51bb573aa ("sc16is7xx: make sure device is in suspend once probed") Cc: stable Signed-off-by: Hugo Villeneuve Link: https://patch.msgid.link/20251006142002.177475-1-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 1a2c4c14f6aa..c7435595dce1 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -588,13 +588,6 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) div /= prescaler; } - /* Enable enhanced features */ - sc16is7xx_efr_lock(port); - sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, - SC16IS7XX_EFR_ENABLE_BIT, - SC16IS7XX_EFR_ENABLE_BIT); - sc16is7xx_efr_unlock(port); - /* If bit MCR_CLKSEL is set, the divide by 4 prescaler is activated. */ sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_CLKSEL_BIT, From ea9f6d316782bf36141df764634a53d085061091 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 8 Oct 2025 12:50:36 +0200 Subject: [PATCH 0290/1075] dt-bindings: serial: sh-sci: Fix r8a78000 interrupts The SCIF instances on R-Car Gen5 have a single interrupt, just like on other R-Car SoCs. Fixes: 6ac1d60473727931 ("dt-bindings: serial: sh-sci: Document r8a78000 bindings") Cc: stable Signed-off-by: Geert Uytterhoeven Acked-by: Kuninori Morimoto Acked-by: Conor Dooley Link: https://patch.msgid.link/09bc9881b31bdb948ce8b69a2b5acf633f5505a4.1759920441.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/renesas,scif.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/serial/renesas,scif.yaml b/Documentation/devicetree/bindings/serial/renesas,scif.yaml index e925cd4c3ac8..72483bc3274d 100644 --- a/Documentation/devicetree/bindings/serial/renesas,scif.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,scif.yaml @@ -197,6 +197,7 @@ allOf: - renesas,rcar-gen2-scif - renesas,rcar-gen3-scif - renesas,rcar-gen4-scif + - renesas,rcar-gen5-scif then: properties: interrupts: From daeb4037adf7d3349b4a1fb792f4bc9824686a4b Mon Sep 17 00:00:00 2001 From: Artem Shimko Date: Sun, 19 Oct 2025 12:51:31 +0300 Subject: [PATCH 0291/1075] serial: 8250_dw: handle reset control deassert error Check the return value of reset_control_deassert() in the probe function to prevent continuing probe when reset deassertion fails. Previously, reset_control_deassert() was called without checking its return value, which could lead to probe continuing even when the device reset wasn't properly deasserted. The fix checks the return value and returns an error with dev_err_probe() if reset deassertion fails, providing better error handling and diagnostics. Fixes: acbdad8dd1ab ("serial: 8250_dw: simplify optional reset handling") Cc: stable Signed-off-by: Artem Shimko Link: https://patch.msgid.link/20251019095131.252848-1-a.shimko.dev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index a53ba04d9770..710ae4d40aec 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -635,7 +635,9 @@ static int dw8250_probe(struct platform_device *pdev) if (IS_ERR(data->rst)) return PTR_ERR(data->rst); - reset_control_deassert(data->rst); + err = reset_control_deassert(data->rst); + if (err) + return dev_err_probe(dev, err, "failed to deassert resets\n"); err = devm_add_action_or_reset(dev, dw8250_reset_control_assert, data->rst); if (err) From d518314a1fa4e980a227d1b2bda1badf433cb932 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 16 Sep 2025 22:37:27 +0100 Subject: [PATCH 0292/1075] serial: 8250_mtk: Enable baud clock and manage in runtime PM Some MediaTek SoCs got a gated UART baud clock, which currently gets disabled as the clk subsystem believes it would be unused. This results in the uart freezing right after "clk: Disabling unused clocks" on those platforms. Request the baud clock to be prepared and enabled during probe, and to restore run-time power management capabilities to what it was before commit e32a83c70cf9 ("serial: 8250-mtk: modify mtk uart power and clock management") disable and unprepare the baud clock when suspending the UART, prepare and enable it again when resuming it. Fixes: e32a83c70cf9 ("serial: 8250-mtk: modify mtk uart power and clock management") Fixes: b6c7ff2693ddc ("serial: 8250_mtk: Simplify clock sequencing and runtime PM") Cc: stable Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Daniel Golle Link: https://patch.msgid.link/de5197ccc31e1dab0965cabcc11ca92e67246cf6.1758058441.git.daniel@makrotopia.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index b44de2ed7413..5875a7b9b4b1 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -435,6 +435,7 @@ static int __maybe_unused mtk8250_runtime_suspend(struct device *dev) while (serial_in(up, MTK_UART_DEBUG0)); + clk_disable_unprepare(data->uart_clk); clk_disable_unprepare(data->bus_clk); return 0; @@ -445,6 +446,7 @@ static int __maybe_unused mtk8250_runtime_resume(struct device *dev) struct mtk8250_data *data = dev_get_drvdata(dev); clk_prepare_enable(data->bus_clk); + clk_prepare_enable(data->uart_clk); return 0; } @@ -475,13 +477,13 @@ static int mtk8250_probe_of(struct platform_device *pdev, struct uart_port *p, int dmacnt; #endif - data->uart_clk = devm_clk_get(&pdev->dev, "baud"); + data->uart_clk = devm_clk_get_enabled(&pdev->dev, "baud"); if (IS_ERR(data->uart_clk)) { /* * For compatibility with older device trees try unnamed * clk when no baud clk can be found. */ - data->uart_clk = devm_clk_get(&pdev->dev, NULL); + data->uart_clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(data->uart_clk)) { dev_warn(&pdev->dev, "Can't get uart clock\n"); return PTR_ERR(data->uart_clk); From 0e4a169d1a2b630c607416d9e3739d80e176ed67 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Tue, 21 Oct 2025 05:35:22 +0000 Subject: [PATCH 0293/1075] sched/fair: Start a cfs_rq on throttled hierarchy with PELT clock throttled Matteo reported hitting the assert_list_leaf_cfs_rq() warning from enqueue_task_fair() post commit fe8d238e646e ("sched/fair: Propagate load for throttled cfs_rq") which transitioned to using cfs_rq_pelt_clock_throttled() check for leaf cfs_rq insertions in propagate_entity_cfs_rq(). The "cfs_rq->pelt_clock_throttled" flag is used to indicate if the hierarchy has its PELT frozen. If a cfs_rq's PELT is marked frozen, all its descendants should have their PELT frozen too or weird things can happen as a result of children accumulating PELT signals when the parents have their PELT clock stopped. Another side effect of this is the loss of integrity of the leaf cfs_rq list. As debugged by Aaron, consider the following hierarchy: root(#) / \ A(#) B(*) | C <--- new cgroup | D <--- new cgroup # - Already on leaf cfs_rq list * - Throttled with PELT frozen The newly created cgroups don't have their "pelt_clock_throttled" signal synced with cgroup B. Next, the following series of events occur: 1. online_fair_sched_group() for cgroup D will call propagate_entity_cfs_rq(). (Same can happen if a throttled task is moved to cgroup C and enqueue_task_fair() returns early.) propagate_entity_cfs_rq() adds the cfs_rq of cgroup C to "rq->tmp_alone_branch" since its PELT clock is not marked throttled and cfs_rq of cgroup B is not on the list. cfs_rq of cgroup B is skipped since its PELT is throttled. root cfs_rq already exists on cfs_rq leading to list_add_leaf_cfs_rq() returning early. The cfs_rq of cgroup C is left dangling on the "rq->tmp_alone_branch". 2. A new task wakes up on cgroup A. Since the whole hierarchy is already on the leaf cfs_rq list, list_add_leaf_cfs_rq() keeps returning early without any modifications to "rq->tmp_alone_branch". The final assert_list_leaf_cfs_rq() in enqueue_task_fair() sees the dangling reference to cgroup C's cfs_rq in "rq->tmp_alone_branch". !!! Splat !!! Syncing the "pelt_clock_throttled" indicator with parent cfs_rq is not enough since the new cfs_rq is not yet enqueued on the hierarchy. A dequeue on other subtree on the throttled hierarchy can freeze the PELT clock for the parent hierarchy without setting the indicators for this newly added cfs_rq which was never enqueued. Since there are no tasks on the new hierarchy, start a cfs_rq on a throttled hierarchy with its PELT clock throttled. The first enqueue, or the distribution (whichever happens first) will unfreeze the PELT clock and queue the cfs_rq on the leaf cfs_rq list. While at it, add an assert_list_leaf_cfs_rq() in propagate_entity_cfs_rq() to catch such cases in the future. Closes: https://lore.kernel.org/lkml/58a587d694f33c2ea487c700b0d046fa@codethink.co.uk/ Fixes: e1fad12dcb66 ("sched/fair: Switch to task based throttle model") Reported-by: Matteo Martelli Suggested-by: Aaron Lu Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Aaron Lu Tested-by: Aaron Lu Tested-by: Matteo Martelli Link: https://patch.msgid.link/20251021053522.37583-1-kprateek.nayak@amd.com --- kernel/sched/fair.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cee1793e8277..25970dbbb279 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6437,6 +6437,16 @@ static void sync_throttle(struct task_group *tg, int cpu) cfs_rq->throttle_count = pcfs_rq->throttle_count; cfs_rq->throttled_clock_pelt = rq_clock_pelt(cpu_rq(cpu)); + + /* + * It is not enough to sync the "pelt_clock_throttled" indicator + * with the parent cfs_rq when the hierarchy is not queued. + * Always join a throttled hierarchy with PELT clock throttled + * and leaf it to the first enqueue, or distribution to + * unthrottle the PELT clock. + */ + if (cfs_rq->throttle_count) + cfs_rq->pelt_clock_throttled = 1; } /* conditionally throttle active cfs_rq's from put_prev_entity() */ @@ -13187,6 +13197,8 @@ static void propagate_entity_cfs_rq(struct sched_entity *se) if (!cfs_rq_pelt_clock_throttled(cfs_rq)) list_add_leaf_cfs_rq(cfs_rq); } + + assert_list_leaf_cfs_rq(rq_of(cfs_rq)); } #else /* !CONFIG_FAIR_GROUP_SCHED: */ static void propagate_entity_cfs_rq(struct sched_entity *se) { } From dbdf2a7feb422f9bacfd12774e624cf26f503eb0 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 20 Oct 2025 04:07:14 +0200 Subject: [PATCH 0294/1075] objtool/rust: add one more `noreturn` Rust function Between Rust 1.79 and 1.86, under `CONFIG_RUST_KERNEL_DOCTESTS=y`, `objtool` may report: rust/doctests_kernel_generated.o: warning: objtool: rust_doctest_kernel_alloc_kbox_rs_13() falls through to next function rust_doctest_kernel_alloc_kvec_rs_0() (as well as in rust_doctest_kernel_alloc_kvec_rs_0) due to calls to the `noreturn` symbol: core::option::expect_failed from code added in commits 779db37373a3 ("rust: alloc: kvec: implement AsPageIter for VVec") and 671618432f46 ("rust: alloc: kbox: implement AsPageIter for VBox"). Thus add the mangled one to the list so that `objtool` knows it is actually `noreturn`. This can be reproduced as well in other versions by tweaking the code, such as the latest stable Rust (1.90.0). Stable does not have code that triggers this, but it could have it in the future. Downstream forks could too. Thus tag it for backport. See commit 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") for more details. Signed-off-by: Miguel Ojeda Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alice Ryhl Cc: stable@vger.kernel.org # Needed in 6.12.y and later. Link: https://patch.msgid.link/20251020020714.2511718-1-ojeda@kernel.org --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a5770570b106..3c7ab910b189 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -217,6 +217,7 @@ static bool is_rust_noreturn(const struct symbol *func) * these come from the Rust standard library). */ return str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail") || + str_ends_with(func->name, "_4core6option13expect_failed") || str_ends_with(func->name, "_4core6option13unwrap_failed") || str_ends_with(func->name, "_4core6result13unwrap_failed") || str_ends_with(func->name, "_4core9panicking5panic") || From 49c98f30f4021b560676a336f8a46a4f642eee2b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 20 Oct 2025 14:23:58 +0200 Subject: [PATCH 0295/1075] objtool: Fix failure when being compiled on x32 system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix compilation failure when compiling the kernel with the x32 toolchain. In file included from check.c:16: check.c: In function ¡check_abs_references¢: /usr/src/git/linux-2.6/tools/objtool/include/objtool/warn.h:47:17: error: format ¡%lx¢ expects argument of type ¡long unsigned int¢, but argument 7 has type ¡u64¢ {aka ¡long long unsigned int¢} [-Werror=format=] 47 | "%s%s%s: objtool" extra ": " format "\n", \ | ^~~~~~~~~~~~~~~~~ /usr/src/git/linux-2.6/tools/objtool/include/objtool/warn.h:54:9: note: in expansion of macro ¡___WARN¢ 54 | ___WARN(severity, "", format, ##__VA_ARGS__) | ^~~~~~~ /usr/src/git/linux-2.6/tools/objtool/include/objtool/warn.h:74:27: note: in expansion of macro ¡__WARN¢ 74 | #define WARN(format, ...) __WARN(WARN_STR, format, ##__VA_ARGS__) | ^~~~~~ check.c:4713:33: note: in expansion of macro ¡WARN¢ 4713 | WARN("section %s has absolute relocation at offset 0x%lx", | ^~~~ Fixes: 0d6e4563fc03 ("objtool: Add action to check for absence of absolute relocations") Signed-off-by: Mikulas Patocka Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Link: https://patch.msgid.link/1ac32fff-2e67-5155-f570-69aad5bf5412@redhat.com --- tools/objtool/check.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3c7ab910b189..620854fdaaf6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4711,8 +4711,8 @@ static int check_abs_references(struct objtool_file *file) for_each_reloc(sec->rsec, reloc) { if (arch_absolute_reloc(file->elf, reloc)) { - WARN("section %s has absolute relocation at offset 0x%lx", - sec->name, reloc_offset(reloc)); + WARN("section %s has absolute relocation at offset 0x%llx", + sec->name, (unsigned long long)reloc_offset(reloc)); ret++; } } From 3293d3d7b08872cf174bb768b890655f1b22526a Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Wed, 22 Oct 2025 15:39:52 +0800 Subject: [PATCH 0296/1075] ASoC: sdw_utils: add name_prefix for rt1321 part id This patch adds name_prefix for rt1321 part id in the codec_info_list. Signed-off-by: Shuming Fan Signed-off-by: Bard Liao Link: https://patch.msgid.link/20251022073952.327451-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 270c66b90228..d717d4143932 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -313,6 +313,7 @@ struct asoc_sdw_codec_info codec_info_list[] = { }, { .part_id = 0x1321, + .name_prefix = "rt1320", .dais = { { .direction = {true, false}, From 1b824134261d2db08fb6583ccbd05cb71861bd53 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 22 Oct 2025 02:41:59 +0200 Subject: [PATCH 0297/1075] spi: dt-bindings: spi-rockchip: Add RK3506 compatible The SPI controller found in the RK3506 SoC is still compatible to the original one introduced with the RK3066, so add the RK3506 compatible to the list of its variants. Signed-off-by: Heiko Stuebner Link: https://patch.msgid.link/20251022004200.204276-1-heiko@sntech.de Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-rockchip.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index 748faf7f7081..ce6762c92fda 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -34,6 +34,7 @@ properties: - rockchip,rk3328-spi - rockchip,rk3368-spi - rockchip,rk3399-spi + - rockchip,rk3506-spi - rockchip,rk3528-spi - rockchip,rk3562-spi - rockchip,rk3568-spi From 119aaeed0b6729293f41ea33be05ecd27a947d48 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 21 Oct 2025 14:40:59 +0200 Subject: [PATCH 0298/1075] of/irq: Add msi-parent check to of_msi_xlate() In some legacy platforms the MSI controller for a PCI host bridge is identified by an msi-parent property whose phandle points at an MSI controller node with no #msi-cells property, that implicitly means #msi-cells == 0. For such platforms, mapping a device ID and retrieving the MSI controller node becomes simply a matter of checking whether in the device hierarchy there is an msi-parent property pointing at an MSI controller node with such characteristics. Add a helper function to of_msi_xlate() to check the msi-parent property in addition to msi-map and retrieve the MSI controller node (with a 1:1 ID deviceID-IN<->deviceID-OUT mapping) to provide support for deviceID mapping and MSI controller node retrieval for such platforms. Fixes: 57d72196dfc8 ("irqchip/gic-v5: Add GICv5 ITS support") Signed-off-by: Lorenzo Pieralisi Reviewed-by: Frank Li Cc: Sascha Bischoff Cc: Rob Herring Cc: Marc Zyngier Link: https://patch.msgid.link/20251021124103.198419-2-lpieralisi@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/irq.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 65c3c23255b7..321d40ec229b 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -671,6 +671,36 @@ void __init of_irq_init(const struct of_device_id *matches) } } +static int of_check_msi_parent(struct device_node *dev_node, struct device_node **msi_node) +{ + struct of_phandle_args msi_spec; + int ret; + + /* + * An msi-parent phandle with a missing or == 0 #msi-cells + * property identifies a 1:1 ID translation mapping. + * + * Set the msi controller node if the firmware matches this + * condition. + */ + ret = of_parse_phandle_with_optional_args(dev_node, "msi-parent", "#msi-cells", + 0, &msi_spec); + if (ret) + return ret; + + if ((*msi_node && *msi_node != msi_spec.np) || msi_spec.args_count != 0) + ret = -EINVAL; + + if (!ret) { + /* Return with a node reference held */ + *msi_node = msi_spec.np; + return 0; + } + of_node_put(msi_spec.np); + + return ret; +} + /** * of_msi_xlate - map a MSI ID and find relevant MSI controller node * @dev: device for which the mapping is to be done. @@ -678,7 +708,7 @@ void __init of_irq_init(const struct of_device_id *matches) * @id_in: Device ID. * * Walk up the device hierarchy looking for devices with a "msi-map" - * property. If found, apply the mapping to @id_in. + * or "msi-parent" property. If found, apply the mapping to @id_in. * If @msi_np points to a non-NULL device node pointer, only entries targeting * that node will be matched; if it points to a NULL value, it will receive the * device node of the first matching target phandle, with a reference held. @@ -692,12 +722,15 @@ u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in) /* * Walk up the device parent links looking for one with a - * "msi-map" property. + * "msi-map" or an "msi-parent" property. */ - for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) + for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) { if (!of_map_id(parent_dev->of_node, id_in, "msi-map", "msi-map-mask", msi_np, &id_out)) break; + if (!of_check_msi_parent(parent_dev->of_node, msi_np)) + break; + } return id_out; } From c71af4d6d56665e04634babfc45dce3d9ab58285 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 21 Oct 2025 14:41:00 +0200 Subject: [PATCH 0299/1075] of/irq: Fix OF node refcount in of_msi_get_domain() In of_msi_get_domain() if the iterator loop stops early because an irq_domain match is detected, an of_node_put() on the iterator node is needed to keep the OF node refcount in sync. Add it. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Frank Li Cc: Rob Herring Link: https://patch.msgid.link/20251021124103.198419-3-lpieralisi@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/irq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 321d40ec229b..ee7d5f0842e8 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -774,8 +774,10 @@ struct irq_domain *of_msi_get_domain(struct device *dev, of_for_each_phandle(&it, err, np, "msi-parent", "#msi-cells", 0) { d = irq_find_matching_host(it.node, token); - if (d) + if (d) { + of_node_put(it.node); return d; + } } return NULL; From 7458f72cc28f9eb0de811effcb5376d0ec19094a Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 17 Oct 2025 12:03:20 +0100 Subject: [PATCH 0300/1075] pmdomain: arm: scmi: Fix genpd leak on provider registration failure If of_genpd_add_provider_onecell() fails during probe, the previously created generic power domains are not removed, leading to a memory leak and potential kernel crash later in genpd_debug_add(). Add proper error handling to unwind the initialized domains before returning from probe to ensure all resources are correctly released on failure. Example crash trace observed without this fix: | Unable to handle kernel paging request at virtual address fffffffffffffc70 | CPU: 1 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.18.0-rc1 #405 PREEMPT | Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform | pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : genpd_debug_add+0x2c/0x160 | lr : genpd_debug_init+0x74/0x98 | Call trace: | genpd_debug_add+0x2c/0x160 (P) | genpd_debug_init+0x74/0x98 | do_one_initcall+0xd0/0x2d8 | do_initcall_level+0xa0/0x140 | do_initcalls+0x60/0xa8 | do_basic_setup+0x28/0x40 | kernel_init_freeable+0xe8/0x170 | kernel_init+0x2c/0x140 | ret_from_fork+0x10/0x20 Fixes: 898216c97ed2 ("firmware: arm_scmi: add device power domain support using genpd") Signed-off-by: Sudeep Holla Reviewed-by: Peng Fan Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/pmdomain/arm/scmi_pm_domain.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/pmdomain/arm/scmi_pm_domain.c b/drivers/pmdomain/arm/scmi_pm_domain.c index 8fe1c0a501c9..b5e2ffd5ea64 100644 --- a/drivers/pmdomain/arm/scmi_pm_domain.c +++ b/drivers/pmdomain/arm/scmi_pm_domain.c @@ -41,7 +41,7 @@ static int scmi_pd_power_off(struct generic_pm_domain *domain) static int scmi_pm_domain_probe(struct scmi_device *sdev) { - int num_domains, i; + int num_domains, i, ret; struct device *dev = &sdev->dev; struct device_node *np = dev->of_node; struct scmi_pm_domain *scmi_pd; @@ -108,9 +108,18 @@ static int scmi_pm_domain_probe(struct scmi_device *sdev) scmi_pd_data->domains = domains; scmi_pd_data->num_domains = num_domains; + ret = of_genpd_add_provider_onecell(np, scmi_pd_data); + if (ret) + goto err_rm_genpds; + dev_set_drvdata(dev, scmi_pd_data); - return of_genpd_add_provider_onecell(np, scmi_pd_data); + return 0; +err_rm_genpds: + for (i = num_domains - 1; i >= 0; i--) + pm_genpd_remove(domains[i]); + + return ret; } static void scmi_pm_domain_remove(struct scmi_device *sdev) From b90cafb438874e1419c14dfbcaf60ec7fc6ea353 Mon Sep 17 00:00:00 2001 From: Madhur Kumar Date: Mon, 13 Oct 2025 15:21:49 +0530 Subject: [PATCH 0301/1075] selftests/cachestat: add tmpshmcstat file to .gitignore Add the tmpshmcstat file to .gitignore to avoid accidentally staging the build artifact Link: https://lore.kernel.org/r/20251013095149.1386628-1-madhurkumar004@gmail.com Signed-off-by: Madhur Kumar Signed-off-by: Shuah Khan --- tools/testing/selftests/cachestat/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/cachestat/.gitignore b/tools/testing/selftests/cachestat/.gitignore index d6c30b43a4bb..abbb13b6e96b 100644 --- a/tools/testing/selftests/cachestat/.gitignore +++ b/tools/testing/selftests/cachestat/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only test_cachestat +tmpshmcstat From 920aa3a7705a061cb3004572d8b7932b54463dbf Mon Sep 17 00:00:00 2001 From: Sidharth Seela Date: Mon, 29 Sep 2025 17:24:06 +0530 Subject: [PATCH 0302/1075] selftests: cachestat: Fix warning on declaration under label Fix warning caused from declaration under a case label. The proper way is to declare variable at the beginning of the function. The warning came from running clang using LLVM=1; and is as follows: -test_cachestat.c:260:3: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 260 | char *map = mmap(NULL, filesize, PROT_READ | PROT_WRITE, | Link: https://lore.kernel.org/r/20250929115405.25695-2-sidharthseela@gmail.com Signed-off-by: Sidharth Seela Reviewed-by: SeongJae Park Reviewed-by: wang lian Reviewed-by: Dev Jain Acked-by: Shuah Khan Acked-by: Nhat Pham Signed-off-by: Shuah Khan --- tools/testing/selftests/cachestat/test_cachestat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c index c952640f163b..ab838bcb9ec5 100644 --- a/tools/testing/selftests/cachestat/test_cachestat.c +++ b/tools/testing/selftests/cachestat/test_cachestat.c @@ -226,7 +226,7 @@ bool run_cachestat_test(enum file_type type) int syscall_ret; size_t compute_len = PS * 512; struct cachestat_range cs_range = { PS, compute_len }; - char *filename = "tmpshmcstat"; + char *filename = "tmpshmcstat", *map; struct cachestat cs; bool ret = true; int fd; @@ -257,7 +257,7 @@ bool run_cachestat_test(enum file_type type) } break; case FILE_MMAP: - char *map = mmap(NULL, filesize, PROT_READ | PROT_WRITE, + map = mmap(NULL, filesize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { From 89205c60c0fc96b73567a2e9fe27ee3f59d01193 Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Wed, 22 Oct 2025 16:17:26 +0200 Subject: [PATCH 0303/1075] USB: serial: option: add Quectel RG255C Add support for Quectel RG255C devices to complement commit 5c964c8a97c1 ("net: usb: qmi_wwan: add Quectel RG255C"). The composition is DM / NMEA / AT / QMI. T: Bus=01 Lev=02 Prnt=99 Port=01 Cnt=02 Dev#=110 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0316 Rev= 5.15 S: Manufacturer=Quectel S: Product=RG255C-GL S: SerialNumber=xxxxxxxx C:* #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Reinhard Speyerer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ed1328648a73..3d6ebe2692a9 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -273,6 +273,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05CN 0x0312 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM05G_RS 0x0314 +#define QUECTEL_PRODUCT_RG255C 0x0316 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 @@ -1271,6 +1272,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG255C, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG255C, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG255C, 0xff, 0xff, 0x40) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, From 4c8cf6bd28d6fea23819f082ddc8063fd6fa963a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Oct 2025 10:33:31 +0200 Subject: [PATCH 0304/1075] block: require LBA dma_alignment when using PI The block layer PI generation / verification code expects the bio_vecs to have at least LBA size (or more correctly integrity internal) granularity. With the direct I/O alignment relaxation in 2022, user space can now feed bios with less alignment than that, leading to scribbling outside the PI buffers. Apparently this wasn't noticed so far because none of the tests generate such buffers, but since 851c4c96db00 ("xfs: implement XFS_IOC_DIOINFO in terms of vfs_getattr"), xfstests generic/013 by default generates such I/O now that the relaxed alignment is advertised by the XFS_IOC_DIOINFO ioctl. Fix this by increasing the required alignment when using PI, although handling arbitrary alignment in the long run would be even nicer. Fixes: bf8d08532bc1 ("iomap: add support for dma aligned direct-io") Fixes: b1a000d3b8ec ("block: relax direct io memory alignment") Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-settings.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index 54cffaae4df4..d74b13ec8e54 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -184,6 +184,16 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) if (!bi->interval_exp) bi->interval_exp = ilog2(lim->logical_block_size); + /* + * The PI generation / validation helpers do not expect intervals to + * straddle multiple bio_vecs. Enforce alignment so that those are + * never generated, and that each buffer is aligned as expected. + */ + if (bi->csum_type) { + lim->dma_alignment = max(lim->dma_alignment, + (1U << bi->interval_exp) - 1); + } + return 0; } From bf5570590a981d0659d0808d2d4bcda21b27a2a5 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 21 Oct 2025 20:38:22 +0100 Subject: [PATCH 0305/1075] MIPS: Malta: Fix keyboard resource preventing i8042 driver from registering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIPS Malta platform code registers the PCI southbridge legacy port I/O PS/2 keyboard range as a standard resource marked as busy. It prevents the i8042 driver from registering as it fails to claim the resource in a call to i8042_platform_init(). Consequently PS/2 keyboard and mouse devices cannot be used with this platform. Fix the issue by removing the busy marker from the standard reservation, making the driver register successfully: serio: i8042 KBD port at 0x60,0x64 irq 1 serio: i8042 AUX port at 0x60,0x64 irq 12 and the resource show up as expected among the legacy devices: 00000000-00ffffff : MSC PCI I/O 00000000-0000001f : dma1 00000020-00000021 : pic1 00000040-0000005f : timer 00000060-0000006f : keyboard 00000060-0000006f : i8042 00000070-00000077 : rtc0 00000080-0000008f : dma page reg 000000a0-000000a1 : pic2 000000c0-000000df : dma2 [...] If the i8042 driver has not been configured, then the standard resource will remain there preventing any conflicting dynamic assignment of this PCI port I/O address range. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Acked-by: Thomas Bogendoerfer Cc: stable@vger.kernel.org Link: https://patch.msgid.link/alpine.DEB.2.21.2510211919240.8377@angie.orcam.me.uk --- arch/mips/mti-malta/malta-setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index 3a2836e9d856..2a3fd8bbf6c2 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -47,7 +47,7 @@ static struct resource standard_io_resources[] = { .name = "keyboard", .start = 0x60, .end = 0x6f, - .flags = IORESOURCE_IO | IORESOURCE_BUSY + .flags = IORESOURCE_IO }, { .name = "dma page reg", From 1d5d1663619d5a367be538f6a1be1cf5bd2cf494 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 21 Oct 2025 20:38:29 +0100 Subject: [PATCH 0306/1075] MIPS: Malta: Fix PCI southbridge legacy resource reservations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Covering the PCI southbridge legacy port I/O range with a northbridge resource reservation prevents MIPS Malta platform code from claiming its standard legacy resources. This is because request_resource() calls cause a clash with the previous reservation and consequently fail. Change to using insert_resource() so as to prevent the clash, switching the legacy reservations from: 00000000-00ffffff : MSC PCI I/O 00000020-00000021 : pic1 00000070-00000077 : rtc0 000000a0-000000a1 : pic2 [...] to: 00000000-00ffffff : MSC PCI I/O 00000000-0000001f : dma1 00000020-00000021 : pic1 00000040-0000005f : timer 00000060-0000006f : keyboard 00000070-00000077 : rtc0 00000080-0000008f : dma page reg 000000a0-000000a1 : pic2 000000c0-000000df : dma2 [...] Fixes: ae81aad5c2e1 ("MIPS: PCI: Use pci_enable_resources()") Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Acked-by: Thomas Bogendoerfer Cc: stable@vger.kernel.org # v6.18+ Link: https://patch.msgid.link/alpine.DEB.2.21.2510212001250.8377@angie.orcam.me.uk --- arch/mips/mti-malta/malta-setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index 2a3fd8bbf6c2..816570514c37 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -213,7 +213,7 @@ void __init plat_mem_setup(void) /* Request I/O space for devices used on the Malta board. */ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, standard_io_resources+i); + insert_resource(&ioport_resource, standard_io_resources + i); /* * Enable DMA channel 4 (cascade channel) in the PIIX4 south bridge. From f294a5fd34db564108a16166d891634a3cb25c68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 17 Oct 2025 14:09:03 +0300 Subject: [PATCH 0307/1075] MIPS: Malta: Use pcibios_align_resource() to block io range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Maciej W. Rozycki , the mips_pcibios_init() for malta adjusts root bus IO resource start address to prevent interfering with PIIX4 I/O cycle decoding. Adjusting lower bound leaves PIIX4 IO resources outside of the root bus resource and assign_fixed_resource_on_bus() does not link the resources into the resource tree. Prior to commit ae81aad5c2e1 ("MIPS: PCI: Use pci_enable_resources()") the arch specific pcibios_enable_resources() did not check if the resources were assigned which diverges from what PCI core checks, effectively hiding the PIIX4 IO resources were not properly within the resource tree. After starting to use pcibios_enable_resources() from PCI core, enabling PIIX4 fails: ata_piix 0000:00:0a.1: BAR 0 [io 0x01f0-0x01f7]: not claimed; can't enable device ata_piix 0000:00:0a.1: probe with driver ata_piix failed with error -22 MIPS PCI code already has support for enforcing lower bounds using PCIBIOS_MIN_IO in pcibios_align_resource() without altering the IO window start address itself. Make malta PCI code too to use PCIBIOS_MIN_IO. Fixes: ae81aad5c2e1 ("MIPS: PCI: Use pci_enable_resources()") Reported-by: Guenter Roeck Link: https://lore.kernel.org/linux-pci/9085ab12-1559-4462-9b18-f03dcb9a4088@roeck-us.net/ Suggested-by: Maciej W. Rozycki Link: https://lore.kernel.org/linux-pci/alpine.DEB.2.21.2510132229120.39634@angie.orcam.me.uk/ Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Tested-by: Guenter Roeck Tested-by: Maciej W. Rozycki Acked-by: Thomas Bogendoerfer Link: https://patch.msgid.link/20251017110903.1973-1-ilpo.jarvinen@linux.intel.com --- arch/mips/pci/pci-malta.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/pci/pci-malta.c b/arch/mips/pci/pci-malta.c index 6aefdf20ca05..2e35aeba45bc 100644 --- a/arch/mips/pci/pci-malta.c +++ b/arch/mips/pci/pci-malta.c @@ -230,8 +230,7 @@ void __init mips_pcibios_init(void) } /* PIIX4 ACPI starts at 0x1000 */ - if (controller->io_resource->start < 0x00001000UL) - controller->io_resource->start = 0x00001000UL; + PCIBIOS_MIN_IO = 0x1000; iomem_resource.end &= 0xfffffffffULL; /* 64 GB */ ioport_resource.end = controller->io_resource->end; From 8ac9b0d33e5c0a995338ee5f25fe1b6ff7d97f65 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 21 Oct 2025 07:16:08 -0600 Subject: [PATCH 0308/1075] io_uring/sqpoll: switch away from getrusage() for CPU accounting getrusage() does a lot more than what the SQPOLL accounting needs, the latter only cares about (and uses) the stime. Rather than do a full RUSAGE_SELF summation, just query the used stime instead. Cc: stable@vger.kernel.org Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 8 ++++---- io_uring/sqpoll.c | 32 ++++++++++++++++++-------------- io_uring/sqpoll.h | 1 + 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index ff3364531c77..294c75a8a3bd 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings; - struct rusage sq_usage; unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; unsigned int sq_head = READ_ONCE(r->sq.head); unsigned int sq_tail = READ_ONCE(r->sq.tail); @@ -152,14 +151,15 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) * thread termination. */ if (tsk) { + u64 usec; + get_task_struct(tsk); rcu_read_unlock(); - getrusage(tsk, RUSAGE_SELF, &sq_usage); + usec = io_sq_cpu_usec(tsk); put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; - sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 - + sq_usage.ru_stime.tv_usec); + sq_total_time = usec; sq_work_time = sq->work_time; } else { rcu_read_unlock(); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index a3f11349ce06..2b816fdb9866 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -169,6 +170,20 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd) return READ_ONCE(sqd->state); } +u64 io_sq_cpu_usec(struct task_struct *tsk) +{ + u64 utime, stime; + + task_cputime_adjusted(tsk, &utime, &stime); + do_div(stime, 1000); + return stime; +} + +static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec) +{ + sqd->work_time += io_sq_cpu_usec(current) - usec; +} + static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) { unsigned int to_submit; @@ -255,26 +270,15 @@ static bool io_sq_tw_pending(struct llist_node *retry_list) return retry_list || !llist_empty(&tctx->task_list); } -static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start) -{ - struct rusage end; - - getrusage(current, RUSAGE_SELF, &end); - end.ru_stime.tv_sec -= start->ru_stime.tv_sec; - end.ru_stime.tv_usec -= start->ru_stime.tv_usec; - - sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000; -} - static int io_sq_thread(void *data) { struct llist_node *retry_list = NULL; struct io_sq_data *sqd = data; struct io_ring_ctx *ctx; - struct rusage start; unsigned long timeout = 0; char buf[TASK_COMM_LEN] = {}; DEFINE_WAIT(wait); + u64 start; /* offload context creation failed, just exit */ if (!current->io_uring) { @@ -317,7 +321,7 @@ static int io_sq_thread(void *data) } cap_entries = !list_is_singular(&sqd->ctx_list); - getrusage(current, RUSAGE_SELF, &start); + start = io_sq_cpu_usec(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { int ret = __io_sq_thread(ctx, cap_entries); @@ -333,7 +337,7 @@ static int io_sq_thread(void *data) if (sqt_spin || !time_after(jiffies, timeout)) { if (sqt_spin) { - io_sq_update_worktime(sqd, &start); + io_sq_update_worktime(sqd, start); timeout = jiffies + sqd->sq_thread_idle; } if (unlikely(need_resched())) { diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h index b83dcdec9765..fd2f6f29b516 100644 --- a/io_uring/sqpoll.h +++ b/io_uring/sqpoll.h @@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); void io_put_sq_data(struct io_sq_data *sqd); void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); +u64 io_sq_cpu_usec(struct task_struct *tsk); static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) { From a94e0657269c5b8e1a90b17aa2c048b3d276e16d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 21 Oct 2025 11:44:39 -0600 Subject: [PATCH 0309/1075] io_uring/sqpoll: be smarter on when to update the stime usage The current approach is a bit naive, and hence calls the time querying way too often. Only start the "doing work" timer when there's actual work to do, and then use that information to terminate (and account) the work time once done. This greatly reduces the frequency of these calls, when they cannot have changed anyway. Running a basic random reader that is setup to use SQPOLL, a profile before this change shows these as the top cycle consumers: + 32.60% iou-sqp-1074 [kernel.kallsyms] [k] thread_group_cputime_adjusted + 19.97% iou-sqp-1074 [kernel.kallsyms] [k] thread_group_cputime + 12.20% io_uring io_uring [.] submitter_uring_fn + 4.13% iou-sqp-1074 [kernel.kallsyms] [k] getrusage + 2.45% iou-sqp-1074 [kernel.kallsyms] [k] io_submit_sqes + 2.18% iou-sqp-1074 [kernel.kallsyms] [k] __pi_memset_generic + 2.09% iou-sqp-1074 [kernel.kallsyms] [k] cputime_adjust and after this change, top of profile looks as follows: + 36.23% io_uring io_uring [.] submitter_uring_fn + 23.26% iou-sqp-819 [kernel.kallsyms] [k] io_sq_thread + 10.14% iou-sqp-819 [kernel.kallsyms] [k] io_sq_tw + 6.52% iou-sqp-819 [kernel.kallsyms] [k] tctx_task_work_run + 4.82% iou-sqp-819 [kernel.kallsyms] [k] nvme_submit_cmds.part.0 + 2.91% iou-sqp-819 [kernel.kallsyms] [k] io_submit_sqes [...] 0.02% iou-sqp-819 [kernel.kallsyms] [k] cputime_adjust where it's spending the cycles on things that actually matter. Reported-by: Fengnan Chang Cc: stable@vger.kernel.org Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 2b816fdb9866..e22f072c7d5f 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -170,6 +170,11 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd) return READ_ONCE(sqd->state); } +struct io_sq_time { + bool started; + u64 usec; +}; + u64 io_sq_cpu_usec(struct task_struct *tsk) { u64 utime, stime; @@ -179,12 +184,24 @@ u64 io_sq_cpu_usec(struct task_struct *tsk) return stime; } -static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec) +static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist) { - sqd->work_time += io_sq_cpu_usec(current) - usec; + if (!ist->started) + return; + ist->started = false; + sqd->work_time += io_sq_cpu_usec(current) - ist->usec; } -static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) +static void io_sq_start_worktime(struct io_sq_time *ist) +{ + if (ist->started) + return; + ist->started = true; + ist->usec = io_sq_cpu_usec(current); +} + +static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd, + bool cap_entries, struct io_sq_time *ist) { unsigned int to_submit; int ret = 0; @@ -197,6 +214,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (to_submit || !wq_list_empty(&ctx->iopoll_list)) { const struct cred *creds = NULL; + io_sq_start_worktime(ist); + if (ctx->sq_creds != current_cred()) creds = override_creds(ctx->sq_creds); @@ -278,7 +297,6 @@ static int io_sq_thread(void *data) unsigned long timeout = 0; char buf[TASK_COMM_LEN] = {}; DEFINE_WAIT(wait); - u64 start; /* offload context creation failed, just exit */ if (!current->io_uring) { @@ -313,6 +331,7 @@ static int io_sq_thread(void *data) mutex_lock(&sqd->lock); while (1) { bool cap_entries, sqt_spin = false; + struct io_sq_time ist = { }; if (io_sqd_events_pending(sqd) || signal_pending(current)) { if (io_sqd_handle_event(sqd)) @@ -321,9 +340,8 @@ static int io_sq_thread(void *data) } cap_entries = !list_is_singular(&sqd->ctx_list); - start = io_sq_cpu_usec(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { - int ret = __io_sq_thread(ctx, cap_entries); + int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist); if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) sqt_spin = true; @@ -331,15 +349,18 @@ static int io_sq_thread(void *data) if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) sqt_spin = true; - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) - if (io_napi(ctx)) + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + if (io_napi(ctx)) { + io_sq_start_worktime(&ist); io_napi_sqpoll_busy_poll(ctx); + } + } + + io_sq_update_worktime(sqd, &ist); if (sqt_spin || !time_after(jiffies, timeout)) { - if (sqt_spin) { - io_sq_update_worktime(sqd, start); + if (sqt_spin) timeout = jiffies + sqd->sq_thread_idle; - } if (unlikely(need_resched())) { mutex_unlock(&sqd->lock); cond_resched(); From 915651b7c9473fd23d0e56fe227a97eda483cf7c Mon Sep 17 00:00:00 2001 From: Ranganath V N Date: Tue, 21 Oct 2025 22:59:30 +0530 Subject: [PATCH 0310/1075] io_uring: Fix code indentation error Fix the indentation to ensure consistent code style and improve readability and to fix the errors: ERROR: code indent should use tabs where possible + return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);$ ERROR: code indent should use tabs where possible +^I^I^I struct io_big_cqe *big_cqe)$ Tested by running the /scripts/checkpatch.pl Signed-off-by: Ranganath V N Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 +- io_uring/net.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 820ef0527666..296667ba712c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -879,7 +879,7 @@ static inline struct io_cqe io_init_cqe(u64 user_data, s32 res, u32 cflags) } static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe, - struct io_big_cqe *big_cqe) + struct io_big_cqe *big_cqe) { struct io_overflow_cqe *ocqe; diff --git a/io_uring/net.c b/io_uring/net.c index f99b90c762fc..a95cc9ca2a4d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -383,7 +383,7 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; if (sr->flags & IORING_SEND_VECTORIZED) - return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); + return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); } From 060aa0b0c26c9e88cfc1433fab3d0145700e8247 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 21 Oct 2025 13:29:44 -0700 Subject: [PATCH 0311/1075] io_uring zcrx: add MAINTAINERS entry Same as [1] but also with netdev@ as an additional mailing list. io_uring zero copy receive is of particular interest to netdev participants too, given its tight integration to netdev core. With this updated entry, folks running get_maintainer.pl on patches that touch io_uring/zcrx.* will know to send it to netdev@ as well. Note that this doesn't mean all changes require explicit acks from netdev; this is purely for wider visibility and for other contributors to know where to send patches. [1]: https://lore.kernel.org/io-uring/989528e611b51d71fb712691ebfb76d2059ba561.1755461246.git.asml.silence@gmail.com/ Signed-off-by: David Wei Acked-by: Jakub Kicinski Reviewed-by: Mina Almasry [axboe: use correct io_uring tree URL] Signed-off-by: Jens Axboe --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..2ed9efa2d2a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13110,6 +13110,15 @@ F: include/uapi/linux/io_uring.h F: include/uapi/linux/io_uring/ F: io_uring/ +IO_URING ZCRX +M: Pavel Begunkov +L: io-uring@vger.kernel.org +L: netdev@vger.kernel.org +T: git https://github.com/isilence/linux.git zcrx/for-next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git +S: Maintained +F: io_uring/zcrx.* + IPMI SUBSYSTEM M: Corey Minyard L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers) From 1af424b15401d2be789c4dc2279889514e7c5c94 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Oct 2025 20:34:05 -0700 Subject: [PATCH 0312/1075] lib/crypto: poly1305: Restore dependency of arch code on !KMSAN Restore the dependency of the architecture-optimized Poly1305 code on !KMSAN. It was dropped by commit b646b782e522 ("lib/crypto: poly1305: Consolidate into single module"). Unlike the other hash algorithms in lib/crypto/ (e.g., SHA-512), the way the architecture-optimized Poly1305 code is integrated results in assembly code initializing memory, for several different architectures. Thus, it generates false positive KMSAN warnings. These could be suppressed with kmsan_unpoison_memory(), but it would be needed in quite a few places. For now let's just restore the dependency on !KMSAN. Note: this should have been caught by running poly1305_kunit with CONFIG_KMSAN=y, which I did. However, due to an unrelated KMSAN bug (https://lore.kernel.org/r/20251022030213.GA35717@sol/), KMSAN currently isn't working reliably. Thus, the warning wasn't noticed until later. Fixes: b646b782e522 ("lib/crypto: poly1305: Consolidate into single module") Reported-by: syzbot+01fcd39a0d90cdb0e3df@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/68f6a48f.050a0220.91a22.0452.GAE@google.com/ Reported-by: Pei Xiao Closes: https://lore.kernel.org/r/751b3d80293a6f599bb07770afcef24f623c7da0.1761026343.git.xiaopei01@kylinos.cn/ Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251022033405.64761-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index eea17e36a22b..8886055e938f 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -97,7 +97,7 @@ config CRYPTO_LIB_POLY1305 config CRYPTO_LIB_POLY1305_ARCH bool - depends on CRYPTO_LIB_POLY1305 && !UML + depends on CRYPTO_LIB_POLY1305 && !UML && !KMSAN default y if ARM default y if ARM64 && KERNEL_MODE_NEON default y if MIPS From 20594cd104abaaabb676c7a2915b150ae5ff093d Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Mon, 6 Oct 2025 14:17:06 +0530 Subject: [PATCH 0313/1075] ACPI: button: Call input_free_device() on failing input device registration Make acpi_button_add() call input_free_device() when input_register_device() fails as required according to the documentation of the latter. Fixes: 0d51157dfaac ("ACPI: button: Eliminate the driver notify callback") Signed-off-by: Kaushlendra Kumar Cc: 6.5+ # 6.5+ [ rjw: Subject and changelog rewrite, Fixes: tag ] Link: https://patch.msgid.link/20251006084706.971855-1-kaushlendra.kumar@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/button.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index 0a7026040188..3c6dd9b4ba0a 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -619,8 +619,10 @@ static int acpi_button_add(struct acpi_device *device) input_set_drvdata(input, device); error = input_register_device(input); - if (error) + if (error) { + input_free_device(input); goto err_remove_fs; + } switch (device->device_type) { case ACPI_BUS_TYPE_POWER_BUTTON: From 60ad1de8e59278656092f56e87189ec82f078d12 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 22 Oct 2025 09:59:24 +0200 Subject: [PATCH 0314/1075] nvmet-auth: update sc_c in host response The target code should set the sc_c bit in calculating the host response based on the status of the 'concat' setting, otherwise we'll get an authentication mismatch for hosts setting that bit correctly. Fixes: 7e091add9c43 ("nvme-auth: update sc_c in host response") Signed-off-by: Hannes Reinecke Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index b340380f3892..ceba21684e82 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -298,7 +298,7 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, const char *hash_name; u8 *challenge = req->sq->dhchap_c1; struct nvme_dhchap_key *transformed_key; - u8 buf[4]; + u8 buf[4], sc_c = ctrl->concat ? 1 : 0; int ret; hash_name = nvme_auth_hmac_name(ctrl->shash_id); @@ -367,13 +367,14 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, ret = crypto_shash_update(shash, buf, 2); if (ret) goto out; - memset(buf, 0, 4); + *buf = sc_c; ret = crypto_shash_update(shash, buf, 1); if (ret) goto out; ret = crypto_shash_update(shash, "HostHost", 8); if (ret) goto out; + memset(buf, 0, 4); ret = crypto_shash_update(shash, ctrl->hostnqn, strlen(ctrl->hostnqn)); if (ret) goto out; From 0bd73ae09ba1b73137d0830b21820d24700e09b1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 11:55:02 +0200 Subject: [PATCH 0315/1075] smb: server: allocate enough space for RW WRs and ib_drain_qp() Make use of rdma_rw_mr_factor() to calculate the number of rw credits and the number of pages per RDMA RW operation. We get the same numbers for iWarp connections, tested with siw.ko and irdma.ko (in iWarp mode). siw: CIFS: max_qp_rd_atom=128, max_fast_reg_page_list_len = 256 CIFS: max_sgl_rd=0, max_sge_rd=1 CIFS: responder_resources=32 max_frmr_depth=256 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 3276800 max_qp_wr 32768 ksmbd: max_fast_reg_page_list_len = 256, max_sgl_rd=0, max_sge_rd=1 ksmbd: device reporting max_cqe 3276800 max_qp_wr 32768 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256 ksmbd: New sc->rw_io.credits: max = 9, num_pages = 256, maxpages=2048 ksmbd: Info: rdma_send_wr 27 + max_send_wr 256 = 283 irdma (in iWarp mode): CIFS: max_qp_rd_atom=127, max_fast_reg_page_list_len = 262144 CIFS: max_sgl_rd=0, max_sge_rd=13 CIFS: responder_resources=32 max_frmr_depth=2048 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: max_fast_reg_page_list_len = 262144, max_sgl_rd=0, max_sge_rd=13 ksmbd: device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256 ksmbd: New sc->rw_io.credits: max = 9, num_pages = 256, maxpages=2048 ksmbd: rdma_send_wr 27 + max_send_wr 256 = 283 This means that we get the different correct numbers for ROCE, tested with rdma_rxe.ko and irdma.ko (in RoCEv2 mode). rxe: CIFS: max_qp_rd_atom=128, max_fast_reg_page_list_len = 512 CIFS: max_sgl_rd=0, max_sge_rd=32 CIFS: responder_resources=32 max_frmr_depth=512 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 32767 max_qp_wr 1048576 ksmbd: max_fast_reg_page_list_len = 512, max_sgl_rd=0, max_sge_rd=32 ksmbd: device reporting max_cqe 32767 max_qp_wr 1048576 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256 ksmbd: New sc->rw_io.credits: max = 65, num_pages = 32, maxpages=2048 ksmbd: rdma_send_wr 65 + max_send_wr 256 = 321 irdma (in RoCEv2 mode): CIFS: max_qp_rd_atom=127, max_fast_reg_page_list_len = 262144, CIFS: max_sgl_rd=0, max_sge_rd=13 CIFS: responder_resources=32 max_frmr_depth=2048 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: max_fast_reg_page_list_len = 262144, max_sgl_rd=0, max_sge_rd=13 ksmbd: device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256, ksmbd: New sc->rw_io.credits: max = 159, num_pages = 13, maxpages=2048 ksmbd: rdma_send_wr 159 + max_send_wr 256 = 415 And rely on rdma_rw_init_qp() to setup ib_mr_pool_init() for RW MRs. ib_mr_pool_destroy() will be called by rdma_rw_cleanup_mrs(). It seems the code was implemented before the rdma_rw_* layer was fully established in the kernel. While there also add additional space for ib_drain_qp(). This should make sure ib_post_send() will never fail because the submission queue is full. Fixes: ddbdc861e37c ("ksmbd: smbd: introduce read/write credits for RDMA read/write") Fixes: 4c564f03e23b ("smb: server: make use of common smbdirect_socket") Fixes: 177368b99243 ("smb: server: make use of common smbdirect_socket_parameters") Fixes: 95475d8886bd ("smb: server: make use smbdirect_socket.rw_io.credits") Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 233 ++++++++++++++++++++------------- 1 file changed, 142 insertions(+), 91 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index a201c5871a77..19b51205dc8c 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -471,7 +471,6 @@ static void free_transport(struct smb_direct_transport *t) if (sc->ib.qp) { ib_drain_qp(sc->ib.qp); - ib_mr_pool_destroy(sc->ib.qp, &sc->ib.qp->rdma_mrs); sc->ib.qp = NULL; rdma_destroy_qp(sc->rdma.cm_id); } @@ -1871,20 +1870,11 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) return ret; } -static unsigned int smb_direct_get_max_fr_pages(struct smbdirect_socket *sc) -{ - return min_t(unsigned int, - sc->ib.dev->attrs.max_fast_reg_page_list_len, - 256); -} - -static int smb_direct_init_params(struct smbdirect_socket *sc, - struct ib_qp_cap *cap) +static int smb_direct_init_params(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; - struct ib_device *device = sc->ib.dev; - int max_send_sges, max_rw_wrs, max_send_wrs; - unsigned int max_sge_per_wr, wrs_per_credit; + int max_send_sges; + unsigned int maxpages; /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, * SMB2 response could be mapped. @@ -1895,67 +1885,18 @@ static int smb_direct_init_params(struct smbdirect_socket *sc, return -EINVAL; } - /* Calculate the number of work requests for RDMA R/W. - * The maximum number of pages which can be registered - * with one Memory region can be transferred with one - * R/W credit. And at least 4 work requests for each credit - * are needed for MR registration, RDMA R/W, local & remote - * MR invalidation. - */ - sc->rw_io.credits.num_pages = smb_direct_get_max_fr_pages(sc); - sc->rw_io.credits.max = DIV_ROUND_UP(sp->max_read_write_size, - (sc->rw_io.credits.num_pages - 1) * - PAGE_SIZE); - - max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, - device->attrs.max_sge_rd); - max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, - max_send_sges); - wrs_per_credit = max_t(unsigned int, 4, - DIV_ROUND_UP(sc->rw_io.credits.num_pages, - max_sge_per_wr) + 1); - max_rw_wrs = sc->rw_io.credits.max * wrs_per_credit; - - max_send_wrs = sp->send_credit_target + max_rw_wrs; - if (max_send_wrs > device->attrs.max_cqe || - max_send_wrs > device->attrs.max_qp_wr) { - pr_err("consider lowering send_credit_target = %d\n", - sp->send_credit_target); - pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - device->attrs.max_cqe, device->attrs.max_qp_wr); - return -EINVAL; - } - - if (sp->recv_credit_max > device->attrs.max_cqe || - sp->recv_credit_max > device->attrs.max_qp_wr) { - pr_err("consider lowering receive_credit_max = %d\n", - sp->recv_credit_max); - pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", - device->attrs.max_cqe, device->attrs.max_qp_wr); - return -EINVAL; - } - - if (device->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE) { - pr_err("warning: device max_send_sge = %d too small\n", - device->attrs.max_send_sge); - return -EINVAL; - } - if (device->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { - pr_err("warning: device max_recv_sge = %d too small\n", - device->attrs.max_recv_sge); - return -EINVAL; - } + maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); + sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, + sc->rdma.cm_id->port_num, + maxpages); + sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max); + /* add one extra in order to handle unaligned pages */ + sc->rw_io.credits.max += 1; sc->recv_io.credits.target = 1; atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); - cap->max_send_wr = max_send_wrs; - cap->max_recv_wr = sp->recv_credit_max; - cap->max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; - cap->max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; - cap->max_inline_data = 0; - cap->max_rdma_ctxs = sc->rw_io.credits.max; return 0; } @@ -2029,13 +1970,129 @@ static int smb_direct_create_pools(struct smbdirect_socket *sc) return -ENOMEM; } -static int smb_direct_create_qpair(struct smbdirect_socket *sc, - struct ib_qp_cap *cap) +static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) +{ + /* + * This could be split out of rdma_rw_init_qp() + * and be a helper function next to rdma_rw_mr_factor() + * + * We can't check unlikely(rdma_rw_force_mr) here, + * but that is most likely 0 anyway. + */ + u32 factor; + + WARN_ON_ONCE(attr->port_num == 0); + + /* + * Each context needs at least one RDMA READ or WRITE WR. + * + * For some hardware we might need more, eventually we should ask the + * HCA driver for a multiplier here. + */ + factor = 1; + + /* + * If the device needs MRs to perform RDMA READ or WRITE operations, + * we'll need two additional MRs for the registrations and the + * invalidation. + */ + if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) + factor += 2; /* inv + reg */ + + return factor * attr->cap.max_rdma_ctxs; +} + +static int smb_direct_create_qpair(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; int ret; + struct ib_qp_cap qp_cap; struct ib_qp_init_attr qp_attr; - int pages_per_rw; + u32 max_send_wr; + u32 rdma_send_wr; + + /* + * Note that {rdma,ib}_create_qp() will call + * rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0. + * It will adjust cap->max_send_wr to the required + * number of additional WRs for the RDMA RW operations. + * It will cap cap->max_send_wr to the device limit. + * + * +1 for ib_drain_qp + */ + qp_cap.max_send_wr = sp->send_credit_target + 1; + qp_cap.max_recv_wr = sp->recv_credit_max + 1; + qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; + qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; + qp_cap.max_inline_data = 0; + qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; + + /* + * Find out the number of max_send_wr + * after rdma_rw_init_qp() adjusted it. + * + * We only do it on a temporary variable, + * as rdma_create_qp() will trigger + * rdma_rw_init_qp() again. + */ + memset(&qp_attr, 0, sizeof(qp_attr)); + qp_attr.cap = qp_cap; + qp_attr.port_num = sc->rdma.cm_id->port_num; + rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); + max_send_wr = qp_cap.max_send_wr + rdma_send_wr; + + if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_send_wr %d\n", + qp_cap.max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d\n", + sp->send_credit_target); + return -EINVAL; + } + + if (qp_cap.max_rdma_ctxs && + (max_send_wr >= sc->ib.dev->attrs.max_cqe || + max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { + pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", + rdma_send_wr, qp_cap.max_send_wr, max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", + sp->send_credit_target, qp_cap.max_rdma_ctxs); + return -EINVAL; + } + + if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_recv_wr %d\n", + qp_cap.max_recv_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering receive_credit_max = %d\n", + sp->recv_credit_max); + return -EINVAL; + } + + if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || + qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { + pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_send_sge, + sc->ib.dev->attrs.max_recv_sge); + return -EINVAL; + } sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); if (IS_ERR(sc->ib.pd)) { @@ -2046,8 +2103,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, } sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->send_credit_target + - cap->max_rdma_ctxs, + max_send_wr, IB_POLL_WORKQUEUE); if (IS_ERR(sc->ib.send_cq)) { pr_err("Can't create RDMA send CQ\n"); @@ -2057,7 +2113,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, } sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->recv_credit_max, + qp_cap.max_recv_wr, IB_POLL_WORKQUEUE); if (IS_ERR(sc->ib.recv_cq)) { pr_err("Can't create RDMA recv CQ\n"); @@ -2066,10 +2122,18 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, goto err; } + /* + * We reset completely here! + * As the above use was just temporary + * to calc max_send_wr and rdma_send_wr. + * + * rdma_create_qp() will trigger rdma_rw_init_qp() + * again if max_rdma_ctxs is not 0. + */ memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smb_direct_qpair_handler; qp_attr.qp_context = sc; - qp_attr.cap = *cap; + qp_attr.cap = qp_cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = sc->ib.send_cq; @@ -2085,18 +2149,6 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, sc->ib.qp = sc->rdma.cm_id->qp; sc->rdma.cm_id->event_handler = smb_direct_cm_handler; - pages_per_rw = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE) + 1; - if (pages_per_rw > sc->ib.dev->attrs.max_sgl_rd) { - ret = ib_mr_pool_init(sc->ib.qp, &sc->ib.qp->rdma_mrs, - sc->rw_io.credits.max, IB_MR_TYPE_MEM_REG, - sc->rw_io.credits.num_pages, 0); - if (ret) { - pr_err("failed to init mr pool count %zu pages %zu\n", - sc->rw_io.credits.max, sc->rw_io.credits.num_pages); - goto err; - } - } - return 0; err: if (sc->ib.qp) { @@ -2183,10 +2235,9 @@ static int smb_direct_prepare(struct ksmbd_transport *t) static int smb_direct_connect(struct smbdirect_socket *sc) { - struct ib_qp_cap qp_cap; int ret; - ret = smb_direct_init_params(sc, &qp_cap); + ret = smb_direct_init_params(sc); if (ret) { pr_err("Can't configure RDMA parameters\n"); return ret; @@ -2198,7 +2249,7 @@ static int smb_direct_connect(struct smbdirect_socket *sc) return ret; } - ret = smb_direct_create_qpair(sc, &qp_cap); + ret = smb_direct_create_qpair(sc); if (ret) { pr_err("Can't accept RDMA client: %d\n", ret); return ret; From 68335cbcddcd586b59820e6d484652ad62343112 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:35:58 +0200 Subject: [PATCH 0316/1075] smb: smbdirect: introduce smbdirect_socket.send_io.lcredits.* This will be used to implement a logic in order to make sure we don't overflow the send submission queue for ib_post_send(). We will initialize the local credits with the fixed sp->send_credit_target value, which matches the reserved slots in the submission queue for ib_post_send(). We will be a local credit first and then wait for a remote credit, if we managed to get both we are allowed to post an IB_WR_SEND[_WITH_INV]. The local credit is given back to the pool when we get the local ib_post_send() completion, while remote credits are granted by the peer. From reading the git history of the linux smbdirect implementations in client and server) it was seen that a peer granted more credits than we requested. I guess that only happened because of bugs in our implementation which was active as client and server. I guess Windows won't do that. So the local credits make sure we only use the amount of credits we asked for. The client already has some logic for this based on smbdirect_socket.send_io.pending.count, but that counts in the order direction and makes it complex it share common logic for various credits classes. That logic will be replaced soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 361db7f9f623..ee5a90d691c8 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -142,7 +142,15 @@ struct smbdirect_socket { } mem; /* - * The credit state for the send side + * The local credit state for ib_post_send() + */ + struct { + atomic_t count; + wait_queue_head_t wait_queue; + } lcredits; + + /* + * The remote credit state for the send side */ struct { atomic_t count; @@ -337,6 +345,9 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); disable_delayed_work_sync(&sc->idle.timer_work); + atomic_set(&sc->send_io.lcredits.count, 0); + init_waitqueue_head(&sc->send_io.lcredits.wait_queue); + atomic_set(&sc->send_io.credits.count, 0); init_waitqueue_head(&sc->send_io.credits.wait_queue); From 8059c64049587dac8af37ad82e2034b64c2d9fee Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:35:59 +0200 Subject: [PATCH 0317/1075] smb: server: smb_direct_disconnect_rdma_connection() already wakes all waiters on error There's no need to care about pending or credit counters when we already disconnecting. And all related wait_event conditions already check for broken connections too. This will simplify the code and makes the following changes simpler. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 19b51205dc8c..9dabaf74db31 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -987,8 +987,6 @@ static int smb_direct_post_send(struct smbdirect_socket *sc, ret = ib_post_send(sc->ib.qp, wr, NULL); if (ret) { pr_err("failed to post send: %d\n", ret); - if (atomic_dec_and_test(&sc->send_io.pending.count)) - wake_up(&sc->send_io.pending.zero_wait_queue); smb_direct_disconnect_rdma_connection(sc); } return ret; @@ -1037,8 +1035,6 @@ static int smb_direct_flush_send_list(struct smbdirect_socket *sc, send_ctx->need_invalidate_rkey, send_ctx->remote_key); } else { - atomic_add(send_ctx->wr_cnt, &sc->send_io.credits.count); - wake_up(&sc->send_io.credits.wait_queue); list_for_each_entry_safe(first, last, &send_ctx->msg_list, sibling_list) { smb_direct_free_sendmsg(sc, first); From a90227462a14f5bdf7dfd4b73c2b75c54834efce Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:36:00 +0200 Subject: [PATCH 0318/1075] smb: server: simplify sibling_list handling in smb_direct_flush_send_list/send_done We have a list handling that is much easier to understand: 1. Before smb_direct_flush_send_list() is called all struct smbdirect_send_io messages are part of send_ctx->msg_list 2. Before smb_direct_flush_send_list() calls smb_direct_post_send() we remove the last element in send_ctx->msg_list and move all others into last->sibling_list. As only last has IB_SEND_SIGNALED and gets a completion vis send_done(). 3. send_done() has an easy way to free all others in sendmsg->sibling_list (if there are any). And use list_for_each_entry_safe() instead of a complex custom logic. This will help us to share send_done() in common code soon, as it will work fine for the client too, where last->sibling_list is currently always an empty list. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 60 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 9dabaf74db31..2aa8e4d4c912 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -523,6 +523,12 @@ static void smb_direct_free_sendmsg(struct smbdirect_socket *sc, { int i; + /* + * The list needs to be empty! + * The caller should take care of it. + */ + WARN_ON_ONCE(!list_empty(&msg->sibling_list)); + if (msg->num_sge > 0) { ib_dma_unmap_single(sc->ib.dev, msg->sge[0].addr, msg->sge[0].length, @@ -908,9 +914,8 @@ static void smb_direct_post_recv_credits(struct work_struct *work) static void send_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbdirect_send_io *sendmsg, *sibling; + struct smbdirect_send_io *sendmsg, *sibling, *next; struct smbdirect_socket *sc; - struct list_head *pos, *prev, *end; sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); sc = sendmsg->socket; @@ -919,27 +924,26 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) ib_wc_status_msg(wc->status), wc->status, wc->opcode); + /* + * Free possible siblings and then the main send_io + */ + list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) { + list_del_init(&sibling->sibling_list); + smb_direct_free_sendmsg(sc, sibling); + } + /* Note this frees wc->wr_cqe, but not wc */ + smb_direct_free_sendmsg(sc, sendmsg); + if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { pr_err("Send error. status='%s (%d)', opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); smb_direct_disconnect_rdma_connection(sc); + return; } if (atomic_dec_and_test(&sc->send_io.pending.count)) wake_up(&sc->send_io.pending.zero_wait_queue); - - /* iterate and free the list of messages in reverse. the list's head - * is invalid. - */ - for (pos = &sendmsg->sibling_list, prev = pos->prev, end = sendmsg->sibling_list.next; - prev != end; pos = prev, prev = prev->prev) { - sibling = container_of(pos, struct smbdirect_send_io, sibling_list); - smb_direct_free_sendmsg(sc, sibling); - } - - sibling = container_of(pos, struct smbdirect_send_io, sibling_list); - smb_direct_free_sendmsg(sc, sibling); } static int manage_credits_prior_sending(struct smbdirect_socket *sc) @@ -1029,17 +1033,29 @@ static int smb_direct_flush_send_list(struct smbdirect_socket *sc, last->wr.send_flags = IB_SEND_SIGNALED; last->wr.wr_cqe = &last->cqe; + /* + * Remove last from send_ctx->msg_list + * and splice the rest of send_ctx->msg_list + * to last->sibling_list. + * + * send_ctx->msg_list is a valid empty list + * at the end. + */ + list_del_init(&last->sibling_list); + list_splice_tail_init(&send_ctx->msg_list, &last->sibling_list); + send_ctx->wr_cnt = 0; + ret = smb_direct_post_send(sc, &first->wr); - if (!ret) { - smb_direct_send_ctx_init(send_ctx, - send_ctx->need_invalidate_rkey, - send_ctx->remote_key); - } else { - list_for_each_entry_safe(first, last, &send_ctx->msg_list, - sibling_list) { - smb_direct_free_sendmsg(sc, first); + if (ret) { + struct smbdirect_send_io *sibling, *next; + + list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { + list_del_init(&sibling->sibling_list); + smb_direct_free_sendmsg(sc, sibling); } + smb_direct_free_sendmsg(sc, last); } + return ret; } From 0158e864cca0c98bdc2866f1eb30c66fa21e250c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:36:01 +0200 Subject: [PATCH 0319/1075] smb: server: make use of smbdirect_socket.send_io.lcredits.* This introduces logic to prevent on overflow of the send submission queue with ib_post_send() easier. As we first get a local credit and then a remote credit before we mark us as pending. From reading the git history of the linux smbdirect implementations in client and server) it was seen that a peer granted more credits than we requested. I guess that only happened because of bugs in our implementation which was active as client and server. I guess Windows won't do that. So the local credits make sure we only use the amount of credits we asked for. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 42 ++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 2aa8e4d4c912..8aaa950a9449 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -219,6 +219,7 @@ static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc) * in order to notice the broken connection. */ wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.lcredits.wait_queue); wake_up_all(&sc->send_io.credits.wait_queue); wake_up_all(&sc->send_io.pending.zero_wait_queue); wake_up_all(&sc->recv_io.reassembly.wait_queue); @@ -916,6 +917,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) { struct smbdirect_send_io *sendmsg, *sibling, *next; struct smbdirect_socket *sc; + int lcredits = 0; sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); sc = sendmsg->socket; @@ -930,9 +932,11 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) { list_del_init(&sibling->sibling_list); smb_direct_free_sendmsg(sc, sibling); + lcredits += 1; } /* Note this frees wc->wr_cqe, but not wc */ smb_direct_free_sendmsg(sc, sendmsg); + lcredits += 1; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { pr_err("Send error. status='%s (%d)', opcode=%d\n", @@ -942,6 +946,9 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) return; } + atomic_add(lcredits, &sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + if (atomic_dec_and_test(&sc->send_io.pending.count)) wake_up(&sc->send_io.pending.zero_wait_queue); } @@ -1081,6 +1088,23 @@ static int wait_for_credits(struct smbdirect_socket *sc, } while (true); } +static int wait_for_send_lcredit(struct smbdirect_socket *sc, + struct smbdirect_send_batch *send_ctx) +{ + if (send_ctx && (atomic_read(&sc->send_io.lcredits.count) <= 1)) { + int ret; + + ret = smb_direct_flush_send_list(sc, send_ctx, false); + if (ret) + return ret; + } + + return wait_for_credits(sc, + &sc->send_io.lcredits.wait_queue, + &sc->send_io.lcredits.count, + 1); +} + static int wait_for_send_credits(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx) { @@ -1268,9 +1292,13 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, int data_length; struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1]; + ret = wait_for_send_lcredit(sc, send_ctx); + if (ret) + goto lcredit_failed; + ret = wait_for_send_credits(sc, send_ctx); if (ret) - return ret; + goto credit_failed; data_length = 0; for (i = 0; i < niov; i++) @@ -1278,10 +1306,8 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, ret = smb_direct_create_header(sc, data_length, remaining_data_length, &msg); - if (ret) { - atomic_inc(&sc->send_io.credits.count); - return ret; - } + if (ret) + goto header_failed; for (i = 0; i < niov; i++) { struct ib_sge *sge; @@ -1319,7 +1345,11 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, return 0; err: smb_direct_free_sendmsg(sc, msg); +header_failed: atomic_inc(&sc->send_io.credits.count); +credit_failed: + atomic_inc(&sc->send_io.lcredits.count); +lcredit_failed: return ret; } @@ -1897,6 +1927,8 @@ static int smb_direct_init_params(struct smbdirect_socket *sc) return -EINVAL; } + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); + maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, sc->rdma.cm_id->port_num, From 123111ea6226c5302cc192028e7ae923c44e1382 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:36:02 +0200 Subject: [PATCH 0320/1075] smb: client: make use of smbdirect_socket.send_io.lcredits.* This makes the logic to prevent on overflow of the send submission queue with ib_post_send() easier. As we first get a local credit and then a remote credit before we mark us as pending. For now we'll keep the logic around smbdirect_socket.send_io.pending.*, but that will likely change or be removed completely. The server will get a similar logic soon, so we'll be able to share the send code in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 67 ++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 49e2df3ad1f0..f2da694336ee 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -172,6 +172,7 @@ static void smbd_disconnect_wake_up_all(struct smbdirect_socket *sc) * in order to notice the broken connection. */ wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.lcredits.wait_queue); wake_up_all(&sc->send_io.credits.wait_queue); wake_up_all(&sc->send_io.pending.dec_wait_queue); wake_up_all(&sc->send_io.pending.zero_wait_queue); @@ -495,6 +496,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) struct smbdirect_send_io *request = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); struct smbdirect_socket *sc = request->socket; + int lcredits = 0; log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%s\n", request, ib_wc_status_msg(wc->status)); @@ -504,22 +506,24 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); + mempool_free(request, sc->send_io.mem.pool); + lcredits += 1; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { if (wc->status != IB_WC_WR_FLUSH_ERR) log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n", ib_wc_status_msg(wc->status), wc->opcode); - mempool_free(request, sc->send_io.mem.pool); smbd_disconnect_rdma_connection(sc); return; } + atomic_add(lcredits, &sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + if (atomic_dec_and_test(&sc->send_io.pending.count)) wake_up(&sc->send_io.pending.zero_wait_queue); wake_up(&sc->send_io.pending.dec_wait_queue); - - mempool_free(request, sc->send_io.mem.pool); } static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) @@ -567,6 +571,7 @@ static bool process_negotiation_response( log_rdma_event(ERR, "error: credits_granted==0\n"); return false; } + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); atomic_set(&sc->send_io.credits.count, le16_to_cpu(packet->credits_granted)); if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { @@ -1114,6 +1119,24 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, struct smbdirect_data_transfer *packet; int new_credits = 0; +wait_lcredit: + /* Wait for local send credits */ + rc = wait_event_interruptible(sc->send_io.lcredits.wait_queue, + atomic_read(&sc->send_io.lcredits.count) > 0 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + if (rc) + goto err_wait_lcredit; + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + log_outgoing(ERR, "disconnected not sending on wait_credit\n"); + rc = -EAGAIN; + goto err_wait_lcredit; + } + if (unlikely(atomic_dec_return(&sc->send_io.lcredits.count) < 0)) { + atomic_inc(&sc->send_io.lcredits.count); + goto wait_lcredit; + } + wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ rc = wait_event_interruptible(sc->send_io.credits.wait_queue, @@ -1132,23 +1155,6 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, goto wait_credit; } -wait_send_queue: - wait_event(sc->send_io.pending.dec_wait_queue, - atomic_read(&sc->send_io.pending.count) < sp->send_credit_target || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); - rc = -EAGAIN; - goto err_wait_send_queue; - } - - if (unlikely(atomic_inc_return(&sc->send_io.pending.count) > - sp->send_credit_target)) { - atomic_dec(&sc->send_io.pending.count); - goto wait_send_queue; - } - request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); if (!request) { rc = -ENOMEM; @@ -1229,10 +1235,21 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, le32_to_cpu(packet->data_length), le32_to_cpu(packet->remaining_data_length)); + /* + * Now that we got a local and a remote credit + * we add us as pending + */ + atomic_inc(&sc->send_io.pending.count); + rc = smbd_post_send(sc, request); if (!rc) return 0; + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); + + wake_up(&sc->send_io.pending.dec_wait_queue); + err_dma: for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr) @@ -1246,14 +1263,14 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, atomic_sub(new_credits, &sc->recv_io.credits.count); err_alloc: - if (atomic_dec_and_test(&sc->send_io.pending.count)) - wake_up(&sc->send_io.pending.zero_wait_queue); - -err_wait_send_queue: - /* roll back send credits and pending */ atomic_inc(&sc->send_io.credits.count); + wake_up(&sc->send_io.credits.wait_queue); err_wait_credit: + atomic_inc(&sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + +err_wait_lcredit: return rc; } From 5370c31e84b0e0999c7b5ff949f4e104def35584 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Fri, 17 Oct 2025 16:18:29 +0100 Subject: [PATCH 0321/1075] net: ravb: Enforce descriptor type ordering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure the TX descriptor type fields are published in a safe order so the DMA engine never begins processing a descriptor chain before all descriptor fields are fully initialised. For multi-descriptor transmits the driver writes DT_FEND into the last descriptor and DT_FSTART into the first. The DMA engine begins processing when it observes DT_FSTART. Move the dma_wmb() barrier so it executes immediately after DT_FEND and immediately before writing DT_FSTART (and before DT_FSINGLE in the single-descriptor case). This guarantees that all prior CPU writes to the descriptor memory are visible to the device before DT_FSTART is seen. This avoids a situation where compiler/CPU reordering could publish DT_FSTART ahead of DT_FEND or other descriptor fields, allowing the DMA to start on a partially initialised chain and causing corrupted transmissions or TX timeouts. Such a failure was observed on RZ/G2L with an RT kernel as transmit queue timeouts and device resets. Fixes: 2f45d1902acf ("ravb: minimize TX data copying") Cc: stable@vger.kernel.org Co-developed-by: Fabrizio Castro Signed-off-by: Fabrizio Castro Signed-off-by: Lad Prabhakar Reviewed-by: Niklas Söderlund Link: https://patch.msgid.link/20251017151830.171062-4-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 9d3bd65b85ff..044ee83c63bb 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2211,13 +2211,25 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_tx_timestamp(skb); } - /* Descriptor type must be set after all the above writes */ - dma_wmb(); + if (num_tx_desc > 1) { desc->die_dt = DT_FEND; desc--; + /* When using multi-descriptors, DT_FEND needs to get written + * before DT_FSTART, but the compiler may reorder the memory + * writes in an attempt to optimize the code. + * Use a dma_wmb() barrier to make sure DT_FEND and DT_FSTART + * are written exactly in the order shown in the code. + * This is particularly important for cases where the DMA engine + * is already running when we are running this code. If the DMA + * sees DT_FSTART without the corresponding DT_FEND it will enter + * an error condition. + */ + dma_wmb(); desc->die_dt = DT_FSTART; } else { + /* Descriptor type must be set after all the above writes */ + dma_wmb(); desc->die_dt = DT_FSINGLE; } ravb_modify(ndev, TCCR, TCCR_TSRQ0 << q, TCCR_TSRQ0 << q); From 706136c5723626fcde8dd8f598a4dcd251e24927 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Fri, 17 Oct 2025 16:18:30 +0100 Subject: [PATCH 0322/1075] net: ravb: Ensure memory write completes before ringing TX doorbell MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a final dma_wmb() barrier before triggering the transmit request (TCCR_TSRQ) to ensure all descriptor and buffer writes are visible to the DMA engine. According to the hardware manual, a read-back operation is required before writing to the doorbell register to guarantee completion of previous writes. Instead of performing a dummy read, a dma_wmb() is used to both enforce the same ordering semantics on the CPU side and also to ensure completion of writes. Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") Cc: stable@vger.kernel.org Co-developed-by: Fabrizio Castro Signed-off-by: Fabrizio Castro Signed-off-by: Lad Prabhakar Reviewed-by: Niklas Söderlund Link: https://patch.msgid.link/20251017151830.171062-5-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/ravb_main.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 044ee83c63bb..e2d7ce1a85e8 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -2232,6 +2232,14 @@ static netdev_tx_t ravb_start_xmit(struct sk_buff *skb, struct net_device *ndev) dma_wmb(); desc->die_dt = DT_FSINGLE; } + + /* Before ringing the doorbell we need to make sure that the latest + * writes have been committed to memory, otherwise it could delay + * things until the doorbell is rang again. + * This is in replacement of the read operation mentioned in the HW + * manuals. + */ + dma_wmb(); ravb_modify(ndev, TCCR, TCCR_TSRQ0 << q, TCCR_TSRQ0 << q); priv->cur_tx[q] += num_tx_desc; From 5b2ff4873aeab972f919d5aea11c51393322bf58 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 20 Oct 2025 09:40:02 +0100 Subject: [PATCH 0323/1075] cifs: Fix TCP_Server_Info::credits to be signed Fix TCP_Server_Info::credits to be signed, just as echo_credits and oplock_credits are. This also fixes what ought to get at least a compilation warning if not an outright error in *get_credits_field() as a pointer to the unsigned server->credits field is passed back as a pointer to a signed int. Signed-off-by: David Howells cc: linux-cifs@vger.kernel.org Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Acked-by: Pavel Shilovskiy Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 16a00a61fd2c..00982aa9428f 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -732,7 +732,7 @@ struct TCP_Server_Info { bool nosharesock; bool tcp_nodelay; bool terminate; - unsigned int credits; /* send no more requests at once */ + int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ unsigned int in_flight; /* number of requests on the wire to server */ unsigned int max_in_flight; /* max number of requests that were on wire */ From a767957e7a83f9e742be196aa52a48de8ac5a7e4 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 21 Oct 2025 18:24:56 +0000 Subject: [PATCH 0324/1075] ptp: ocp: Fix typo using index 1 instead of i in SMA initialization loop In ptp_ocp_sma_fb_init(), the code mistakenly used bp->sma[1] instead of bp->sma[i] inside a for-loop, which caused only SMA[1] to have its DIRECTION_CAN_CHANGE capability cleared. This led to inconsistent capability flags across SMA pins. Fixes: 09eeb3aecc6c ("ptp_ocp: implement DPLL ops") Signed-off-by: Jiasheng Jiang Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251021182456.9729-1-jiashengjiangcool@gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 794ec6e71990..a5c363252986 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -2548,7 +2548,7 @@ ptp_ocp_sma_fb_init(struct ptp_ocp *bp) for (i = 0; i < OCP_SMA_NUM; i++) { bp->sma[i].fixed_fcn = true; bp->sma[i].fixed_dir = true; - bp->sma[1].dpll_prop.capabilities &= + bp->sma[i].dpll_prop.capabilities &= ~DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE; } return; From 441f0647f7673e0e64d4910ef61a5fb8f16bfb82 Mon Sep 17 00:00:00 2001 From: Alexey Simakov Date: Tue, 21 Oct 2025 16:00:36 +0300 Subject: [PATCH 0325/1075] sctp: avoid NULL dereference when chunk data buffer is missing chunk->skb pointer is dereferenced in the if-block where it's supposed to be NULL only. chunk->skb can only be NULL if chunk->head_skb is not. Check for frag_list instead and do it just before replacing chunk->skb. We're sure that otherwise chunk->skb is non-NULL because of outer if() condition. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Alexey Simakov Acked-by: Marcelo Ricardo Leitner Link: https://patch.msgid.link/20251021130034.6333-1-bigalex934@gmail.com Signed-off-by: Jakub Kicinski --- net/sctp/inqueue.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 5c1652181805..f5a7d5a38755 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -169,13 +169,14 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue) chunk->head_skb = chunk->skb; /* skbs with "cover letter" */ - if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len) + if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len) { + if (WARN_ON(!skb_shinfo(chunk->skb)->frag_list)) { + __SCTP_INC_STATS(dev_net(chunk->skb->dev), + SCTP_MIB_IN_PKT_DISCARDS); + sctp_chunk_free(chunk); + goto next_chunk; + } chunk->skb = skb_shinfo(chunk->skb)->frag_list; - - if (WARN_ON(!chunk->skb)) { - __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); - sctp_chunk_free(chunk); - goto next_chunk; } } From c0178eec8884231a5ae0592b9fce827bccb77e86 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Mon, 20 Oct 2025 15:55:33 +0200 Subject: [PATCH 0326/1075] net: hsr: prevent creation of HSR device with slaves from another netns HSR/PRP driver does not handle correctly having slaves/interlink devices in a different net namespace. Currently, it is possible to create a HSR link in a different net namespace than the slaves/interlink with the following command: ip link add hsr0 netns hsr-ns type hsr slave1 eth1 slave2 eth2 As there is no use-case on supporting this scenario, enforce that HSR device link matches netns defined by IFLA_LINK_NETNSID. The iproute2 command mentioned above will throw the following error: Error: hsr: HSR slaves/interlink must be on the same net namespace than HSR link. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Fernando Fernandez Mancera Link: https://patch.msgid.link/20251020135533.9373-1-fmancera@suse.de Signed-off-by: Jakub Kicinski --- net/hsr/hsr_netlink.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index b120470246cc..c96b63adf96f 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -34,12 +34,18 @@ static int hsr_newlink(struct net_device *dev, struct netlink_ext_ack *extack) { struct net *link_net = rtnl_newlink_link_net(params); + struct net_device *link[2], *interlink = NULL; struct nlattr **data = params->data; enum hsr_version proto_version; unsigned char multicast_spec; u8 proto = HSR_PROTOCOL_HSR; - struct net_device *link[2], *interlink = NULL; + if (!net_eq(link_net, dev_net(dev))) { + NL_SET_ERR_MSG_MOD(extack, + "HSR slaves/interlink must be on the same net namespace than HSR link"); + return -EINVAL; + } + if (!data) { NL_SET_ERR_MSG_MOD(extack, "No slave devices specified"); return -EINVAL; From 0194b65ab571430130a11f8500894bb88f7af2c0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 21 Oct 2025 08:15:05 -0700 Subject: [PATCH 0327/1075] nvme-pci: use blk_map_iter for p2p metadata The dma_map_bvec helper doesn't work for p2p data, so use the same blk_map_iter method that sgl uses for this memory type. Reported-by: Leon Romanovsky Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c916176bd9f0..72fb675a696f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1042,7 +1042,7 @@ static blk_status_t nvme_map_data(struct request *req) return nvme_pci_setup_data_prp(req, &iter); } -static blk_status_t nvme_pci_setup_meta_sgls(struct request *req) +static blk_status_t nvme_pci_setup_meta_iter(struct request *req) { struct nvme_queue *nvmeq = req->mq_hctx->driver_data; unsigned int entries = req->nr_integrity_segments; @@ -1072,8 +1072,12 @@ static blk_status_t nvme_pci_setup_meta_sgls(struct request *req) * descriptor provides an explicit length, so we're relying on that * mechanism to catch any misunderstandings between the application and * device. + * + * P2P DMA also needs to use the blk_dma_iter method, so mptr setup + * leverages this routine when that happens. */ - if (entries == 1 && !(nvme_req(req)->flags & NVME_REQ_USERCMD)) { + if (!nvme_ctrl_meta_sgl_supported(&dev->ctrl) || + (entries == 1 && !(nvme_req(req)->flags & NVME_REQ_USERCMD))) { iod->cmd.common.metadata = cpu_to_le64(iter.addr); iod->meta_total_len = iter.len; iod->meta_dma = iter.addr; @@ -1114,6 +1118,9 @@ static blk_status_t nvme_pci_setup_meta_mptr(struct request *req) struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct bio_vec bv = rq_integrity_vec(req); + if (is_pci_p2pdma_page(bv.bv_page)) + return nvme_pci_setup_meta_iter(req); + iod->meta_dma = dma_map_bvec(nvmeq->dev->dev, &bv, rq_dma_dir(req), 0); if (dma_mapping_error(nvmeq->dev->dev, iod->meta_dma)) return BLK_STS_IOERR; @@ -1128,7 +1135,7 @@ static blk_status_t nvme_map_metadata(struct request *req) if ((iod->cmd.common.flags & NVME_CMD_SGL_METABUF) && nvme_pci_metadata_use_sgls(req)) - return nvme_pci_setup_meta_sgls(req); + return nvme_pci_setup_meta_iter(req); return nvme_pci_setup_meta_mptr(req); } From 3ac2939bc4341ac28700a2ed0c345ba7e7bdb6fd Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 17 Oct 2025 14:32:54 +0200 Subject: [PATCH 0328/1075] crypto: s390/phmac - Do not modify the req->nbytes value The phmac implementation used the req->nbytes field on combined operations (finup, digest) to track the state: with req->nbytes > 0 the update needs to be processed, while req->nbytes == 0 means to do the final operation. For this purpose the req->nbytes field was set to 0 after successful update operation. However, aead uses the req->nbytes field after a successful hash operation to determine the amount of data to en/decrypt. So an implementation must not modify the nbytes field. Fixed by a slight rework on the phmac implementation. There is now a new field async_op in the request context which tracks the (asynch) operation to process. So the 'state' via req->nbytes is not needed any more and now this field is untouched and may be evaluated even after a request is processed by the phmac implementation. Fixes: cbbc675506cc ("crypto: s390 - New s390 specific protected key hash phmac") Reported-by: Ingo Franzki Signed-off-by: Harald Freudenberger Tested-by: Ingo Franzki Reviewed-by: Ingo Franzki Reviewed-by: Holger Dengler Signed-off-by: Herbert Xu --- arch/s390/crypto/phmac_s390.c | 52 +++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c index 7ecfdc4fba2d..89f3e6d8fd89 100644 --- a/arch/s390/crypto/phmac_s390.c +++ b/arch/s390/crypto/phmac_s390.c @@ -169,11 +169,18 @@ struct kmac_sha2_ctx { u64 buflen[2]; }; +enum async_op { + OP_NOP = 0, + OP_UPDATE, + OP_FINAL, + OP_FINUP, +}; + /* phmac request context */ struct phmac_req_ctx { struct hash_walk_helper hwh; struct kmac_sha2_ctx kmac_ctx; - bool final; + enum async_op async_op; }; /* @@ -610,6 +617,7 @@ static int phmac_update(struct ahash_request *req) * using engine to serialize requests. */ if (rc == 0 || rc == -EKEYEXPIRED) { + req_ctx->async_op = OP_UPDATE; atomic_inc(&tfm_ctx->via_engine_ctr); rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); if (rc != -EINPROGRESS) @@ -647,8 +655,7 @@ static int phmac_final(struct ahash_request *req) * using engine to serialize requests. */ if (rc == 0 || rc == -EKEYEXPIRED) { - req->nbytes = 0; - req_ctx->final = true; + req_ctx->async_op = OP_FINAL; atomic_inc(&tfm_ctx->via_engine_ctr); rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); if (rc != -EINPROGRESS) @@ -676,13 +683,16 @@ static int phmac_finup(struct ahash_request *req) if (rc) goto out; + req_ctx->async_op = OP_FINUP; + /* Try synchronous operations if no active engine usage */ if (!atomic_read(&tfm_ctx->via_engine_ctr)) { rc = phmac_kmac_update(req, false); if (rc == 0) - req->nbytes = 0; + req_ctx->async_op = OP_FINAL; } - if (!rc && !req->nbytes && !atomic_read(&tfm_ctx->via_engine_ctr)) { + if (!rc && req_ctx->async_op == OP_FINAL && + !atomic_read(&tfm_ctx->via_engine_ctr)) { rc = phmac_kmac_final(req, false); if (rc == 0) goto out; @@ -694,7 +704,7 @@ static int phmac_finup(struct ahash_request *req) * using engine to serialize requests. */ if (rc == 0 || rc == -EKEYEXPIRED) { - req_ctx->final = true; + /* req->async_op has been set to either OP_FINUP or OP_FINAL */ atomic_inc(&tfm_ctx->via_engine_ctr); rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); if (rc != -EINPROGRESS) @@ -855,15 +865,16 @@ static int phmac_do_one_request(struct crypto_engine *engine, void *areq) /* * Three kinds of requests come in here: - * update when req->nbytes > 0 and req_ctx->final is false - * final when req->nbytes = 0 and req_ctx->final is true - * finup when req->nbytes > 0 and req_ctx->final is true - * For update and finup the hwh walk needs to be prepared and - * up to date but the actual nr of bytes in req->nbytes may be - * any non zero number. For final there is no hwh walk needed. + * 1. req->async_op == OP_UPDATE with req->nbytes > 0 + * 2. req->async_op == OP_FINUP with req->nbytes > 0 + * 3. req->async_op == OP_FINAL + * For update and finup the hwh walk has already been prepared + * by the caller. For final there is no hwh walk needed. */ - if (req->nbytes) { + switch (req_ctx->async_op) { + case OP_UPDATE: + case OP_FINUP: rc = phmac_kmac_update(req, true); if (rc == -EKEYEXPIRED) { /* @@ -880,10 +891,11 @@ static int phmac_do_one_request(struct crypto_engine *engine, void *areq) hwh_advance(hwh, rc); goto out; } - req->nbytes = 0; - } - - if (req_ctx->final) { + if (req_ctx->async_op == OP_UPDATE) + break; + req_ctx->async_op = OP_FINAL; + fallthrough; + case OP_FINAL: rc = phmac_kmac_final(req, true); if (rc == -EKEYEXPIRED) { /* @@ -897,10 +909,14 @@ static int phmac_do_one_request(struct crypto_engine *engine, void *areq) cond_resched(); return -ENOSPC; } + break; + default: + /* unknown/unsupported/unimplemented asynch op */ + return -EOPNOTSUPP; } out: - if (rc || req_ctx->final) + if (rc || req_ctx->async_op == OP_FINAL) memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); pr_debug("request complete with rc=%d\n", rc); local_bh_disable(); From 3c9bf72cc1ced1297b235f9422d62b613a3fdae9 Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Mon, 20 Oct 2025 18:11:09 +0800 Subject: [PATCH 0329/1075] crypto: aspeed - fix double free caused by devm The clock obtained via devm_clk_get_enabled() is automatically managed by devres and will be disabled and freed on driver detach. Manually calling clk_disable_unprepare() in error path and remove function causes double free. Remove the manual clock cleanup in both aspeed_acry_probe()'s error path and aspeed_acry_remove(). Fixes: 2f1cf4e50c95 ("crypto: aspeed - Add ACRY RSA driver") Signed-off-by: Haotian Zhang Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-acry.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-acry.c b/drivers/crypto/aspeed/aspeed-acry.c index 8d1c79aaca07..5993bcba9716 100644 --- a/drivers/crypto/aspeed/aspeed-acry.c +++ b/drivers/crypto/aspeed/aspeed-acry.c @@ -787,7 +787,6 @@ static int aspeed_acry_probe(struct platform_device *pdev) err_engine_rsa_start: crypto_engine_exit(acry_dev->crypt_engine_rsa); clk_exit: - clk_disable_unprepare(acry_dev->clk); return rc; } @@ -799,7 +798,6 @@ static void aspeed_acry_remove(struct platform_device *pdev) aspeed_acry_unregister(acry_dev); crypto_engine_exit(acry_dev->crypt_engine_rsa); tasklet_kill(&acry_dev->done_task); - clk_disable_unprepare(acry_dev->clk); } MODULE_DEVICE_TABLE(of, aspeed_acry_of_matches); From e4a77f9c85a528b3289c1d9570d6d73a7b5f847b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Oct 2025 15:37:15 +0200 Subject: [PATCH 0330/1075] gpiolib: acpi: Make set debounce errors non fatal Commit 16c07342b542 ("gpiolib: acpi: Program debounce when finding GPIO") adds a gpio_set_debounce_timeout() call to acpi_find_gpio() and makes acpi_find_gpio() fail if this fails. But gpio_set_debounce_timeout() failing is a somewhat normal occurrence, since not all debounce values are supported on all GPIO/pinctrl chips. Making this an error for example break getting the card-detect GPIO for the micro-sd slot found on many Bay Trail tablets, breaking support for the micro-sd slot on these tablets. acpi_request_own_gpiod() already treats gpio_set_debounce_timeout() failures as non-fatal, just warning about them. Add a acpi_gpio_set_debounce_timeout() helper which wraps gpio_set_debounce_timeout() and warns on failures and replace both existing gpio_set_debounce_timeout() calls with the helper. Since the helper only warns on failures this fixes the card-detect issue. Fixes: 16c07342b542 ("gpiolib: acpi: Program debounce when finding GPIO") Cc: stable@vger.kernel.org Cc: Mario Limonciello Signed-off-by: Hans de Goede Acked-by: Andy Shevchenko Link: https://lore.kernel.org/stable/20250920201200.20611-1-hansg%40kernel.org Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi-core.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi-core.c b/drivers/gpio/gpiolib-acpi-core.c index 284e762d92c4..67c4c38afb86 100644 --- a/drivers/gpio/gpiolib-acpi-core.c +++ b/drivers/gpio/gpiolib-acpi-core.c @@ -291,6 +291,19 @@ acpi_gpio_to_gpiod_flags(const struct acpi_resource_gpio *agpio, int polarity) return GPIOD_ASIS; } +static void acpi_gpio_set_debounce_timeout(struct gpio_desc *desc, + unsigned int acpi_debounce) +{ + int ret; + + /* ACPI uses hundredths of milliseconds units */ + acpi_debounce *= 10; + ret = gpio_set_debounce_timeout(desc, acpi_debounce); + if (ret) + gpiod_warn(desc, "Failed to set debounce-timeout %u: %d\n", + acpi_debounce, ret); +} + static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, struct acpi_resource_gpio *agpio, unsigned int index, @@ -300,18 +313,12 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, enum gpiod_flags flags = acpi_gpio_to_gpiod_flags(agpio, polarity); unsigned int pin = agpio->pin_table[index]; struct gpio_desc *desc; - int ret; desc = gpiochip_request_own_desc(chip, pin, label, polarity, flags); if (IS_ERR(desc)) return desc; - /* ACPI uses hundredths of milliseconds units */ - ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout * 10); - if (ret) - dev_warn(chip->parent, - "Failed to set debounce-timeout for pin 0x%04X, err %d\n", - pin, ret); + acpi_gpio_set_debounce_timeout(desc, agpio->debounce_timeout); return desc; } @@ -944,7 +951,6 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, bool can_fallback = acpi_can_fallback_to_crs(adev, con_id); struct acpi_gpio_info info = {}; struct gpio_desc *desc; - int ret; desc = __acpi_find_gpio(fwnode, con_id, idx, can_fallback, &info); if (IS_ERR(desc)) @@ -959,10 +965,7 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, acpi_gpio_update_gpiod_flags(dflags, &info); acpi_gpio_update_gpiod_lookup_flags(lookupflags, &info); - /* ACPI uses hundredths of milliseconds units */ - ret = gpio_set_debounce_timeout(desc, info.debounce * 10); - if (ret) - return ERR_PTR(ret); + acpi_gpio_set_debounce_timeout(desc, info.debounce); return desc; } From b1055678a0160b2952c322ad1b61805562698f99 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 23 Oct 2025 08:39:58 +0200 Subject: [PATCH 0331/1075] gpiolib: acpi: Use %pe when passing an error pointer to dev_err() One of the coccinelle recipe suggests to use %pe when we deal with an error pointer. Do it so. Reported-by: kernel test robot Reported-by: Julia Lawall Closes: https://lore.kernel.org/r/202510231350.calxvXIm-lkp@intel.com/ Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi-core.c b/drivers/gpio/gpiolib-acpi-core.c index 67c4c38afb86..d441c1236d8c 100644 --- a/drivers/gpio/gpiolib-acpi-core.c +++ b/drivers/gpio/gpiolib-acpi-core.c @@ -382,8 +382,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, desc = acpi_request_own_gpiod(chip, agpio, 0, "ACPI:Event"); if (IS_ERR(desc)) { dev_err(chip->parent, - "Failed to request GPIO for pin 0x%04X, err %ld\n", - pin, PTR_ERR(desc)); + "Failed to request GPIO for pin 0x%04X, err %pe\n", + pin, desc); return AE_OK; } From 5c56bf214af85ca042bf97f8584aab2151035840 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Thu, 23 Oct 2025 11:32:01 +0800 Subject: [PATCH 0332/1075] mtd: rawnand: cadence: fix DMA device NULL pointer dereference The DMA device pointer `dma_dev` was being dereferenced before ensuring that `cdns_ctrl->dmac` is properly initialized. Move the assignment of `dma_dev` after successfully acquiring the DMA channel to ensure the pointer is valid before use. Fixes: d76d22b5096c ("mtd: rawnand: cadence: use dma_map_resource for sdma address") Cc: stable@vger.kernel.org Signed-off-by: Niravkumar L Rabara Signed-off-by: Miquel Raynal --- drivers/mtd/nand/raw/cadence-nand-controller.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index 6667eea95597..32ed38b89394 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -2871,7 +2871,7 @@ cadence_nand_irq_cleanup(int irqnum, struct cdns_nand_ctrl *cdns_ctrl) static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) { dma_cap_mask_t mask; - struct dma_device *dma_dev = cdns_ctrl->dmac->device; + struct dma_device *dma_dev; int ret; cdns_ctrl->cdma_desc = dma_alloc_coherent(cdns_ctrl->dev, @@ -2915,6 +2915,7 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) } } + dma_dev = cdns_ctrl->dmac->device; cdns_ctrl->io.iova_dma = dma_map_resource(dma_dev->dev, cdns_ctrl->io.dma, cdns_ctrl->io.size, DMA_BIDIRECTIONAL, 0); From 72ed55b4c335703c203b942972558173e1e5ddee Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 22 Oct 2025 21:11:01 -0300 Subject: [PATCH 0333/1075] smb: client: get rid of d_drop() in cifs_do_rename() There is no need to force a lookup by unhashing the moved dentry after successfully renaming the file on server. The file metadata will be re-fetched from server, if necessary, in the next call to ->d_revalidate() anyways. Signed-off-by: Paulo Alcantara (Red Hat) Reviewed-by: David Howells Cc: stable@vger.kernel.org Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/inode.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 098a79b7a959..cac355364e43 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2484,11 +2484,8 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ do_rename_exit: - if (rc == 0) { + if (rc == 0) d_move(from_dentry, to_dentry); - /* Force a new lookup */ - d_drop(from_dentry); - } cifs_put_tlink(tlink); return rc; } From 4b1d7f62225a2fd024b2df5675515557169f17e7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Oct 2025 18:10:08 +0100 Subject: [PATCH 0334/1075] cifs: Call the calc_signature functions directly As the SMB1 and SMB2/3 calc_signature functions are called from separate sign and verify paths, just call them directly rather than using a function pointer. The SMB3 calc_signature then jumps to the SMB2 variant if necessary. Signed-off-by: David Howells Acked-by: Enzo Matsumiya cc: Paulo Alcantara cc: Shyam Prasad N cc: Tom Talpey cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 2 -- fs/smb/client/smb2ops.c | 4 ---- fs/smb/client/smb2proto.h | 6 ------ fs/smb/client/smb2transport.c | 18 +++++++++--------- 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 00982aa9428f..203e2aaa3c25 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -534,8 +534,6 @@ struct smb_version_operations { void (*new_lease_key)(struct cifs_fid *); int (*generate_signingkey)(struct cifs_ses *ses, struct TCP_Server_Info *server); - int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *, - bool allocate_crypto); int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 95cd484cfbba..0f9130ef2e7d 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5446,7 +5446,6 @@ struct smb_version_operations smb20_operations = { .get_lease_key = smb2_get_lease_key, .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, - .calc_signature = smb2_calc_signature, .is_read_op = smb2_is_read_op, .set_oplock_level = smb2_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, @@ -5550,7 +5549,6 @@ struct smb_version_operations smb21_operations = { .get_lease_key = smb2_get_lease_key, .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, - .calc_signature = smb2_calc_signature, .is_read_op = smb21_is_read_op, .set_oplock_level = smb21_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, @@ -5660,7 +5658,6 @@ struct smb_version_operations smb30_operations = { .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, .generate_signingkey = generate_smb30signingkey, - .calc_signature = smb3_calc_signature, .set_integrity = smb3_set_integrity, .is_read_op = smb21_is_read_op, .set_oplock_level = smb3_set_oplock_level, @@ -5777,7 +5774,6 @@ struct smb_version_operations smb311_operations = { .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, .generate_signingkey = generate_smb311signingkey, - .calc_signature = smb3_calc_signature, .set_integrity = smb3_set_integrity, .is_read_op = smb21_is_read_op, .set_oplock_level = smb3_set_oplock_level, diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 6eb86d134abc..5241daaae543 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -39,12 +39,6 @@ extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid); -extern int smb2_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, - bool allocate_crypto); -extern int smb3_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, - bool allocate_crypto); extern void smb2_echo_request(struct work_struct *work); extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); extern bool smb2_is_valid_oplock_break(char *buffer, diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index ad6068e17a2a..6a9b80385b86 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -209,9 +209,9 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) return tcon; } -int +static int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, - bool allocate_crypto) + bool allocate_crypto) { int rc; unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; @@ -465,9 +465,9 @@ generate_smb311signingkey(struct cifs_ses *ses, return generate_smb3signingkey(ses, server, &triplet); } -int +static int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, - bool allocate_crypto) + bool allocate_crypto) { int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; @@ -477,6 +477,9 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, struct smb_rqst drqst; u8 key[SMB3_SIGN_KEY_SIZE]; + if (server->vals->protocol_id <= SMB21_PROT_ID) + return smb2_calc_signature(rqst, server, allocate_crypto); + rc = smb3_get_sign_key(le64_to_cpu(shdr->SessionId), server, key); if (unlikely(rc)) { cifs_server_dbg(FYI, "%s: Could not get signing key\n", __func__); @@ -547,7 +550,6 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, static int smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - int rc = 0; struct smb2_hdr *shdr; struct smb2_sess_setup_req *ssr; bool is_binding; @@ -574,9 +576,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) return 0; } - rc = server->ops->calc_signature(rqst, server, false); - - return rc; + return smb3_calc_signature(rqst, server, false); } int @@ -612,7 +612,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(shdr->Signature, 0, SMB2_SIGNATURE_SIZE); - rc = server->ops->calc_signature(rqst, server, true); + rc = smb3_calc_signature(rqst, server, true); if (rc) return rc; From 64c9471aa9ded2440bf182b1c71d3f93f80b2f85 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 20 Oct 2025 10:16:07 +0100 Subject: [PATCH 0335/1075] cifs: #include cifsglob.h before trace.h to allow structs in tracepoints Make cifs #include cifsglob.h in advance of #including trace.h so that the structures defined in cifsglob.h can be accessed directly by the cifs tracepoints rather than the callers having to manually pass in the bits and pieces. This should allow the tracepoints to be made more efficient to use as well as easier to read in the code. Signed-off-by: David Howells cc: Paulo Alcantara cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsproto.h | 1 + fs/smb/client/trace.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 4976be2c47c1..fb1813cbe0eb 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -9,6 +9,7 @@ #define _CIFSPROTO_H #include #include +#include "cifsglob.h" #include "trace.h" #ifdef CONFIG_CIFS_DFS_UPCALL #include "dfs_cache.h" diff --git a/fs/smb/client/trace.c b/fs/smb/client/trace.c index 465483787193..16b0e719731f 100644 --- a/fs/smb/client/trace.c +++ b/fs/smb/client/trace.c @@ -4,5 +4,6 @@ * * Author(s): Steve French */ +#include "cifsglob.h" #define CREATE_TRACE_POINTS #include "trace.h" From 79816d4b9e9b9bb03d5d871c04c97b1bce102b14 Mon Sep 17 00:00:00 2001 From: Samuel Wu Date: Wed, 22 Oct 2025 22:28:30 +0000 Subject: [PATCH 0336/1075] Revert "PM: sleep: Make pm_wakeup_clear() call more clear" This reverts commit 56a232d93cea0ba14da5e3157830330756a45b4c. The above commit changed the position of pm_wakeup_clear() for the suspend call path, but other call paths with references to freeze_processes() were not updated. This means that other call paths, such as hibernate(), will not have pm_wakeup_clear() called. Suggested-by: Saravana Kannan Signed-off-by: Samuel Wu [ rjw: Changelog edits ] Link: https://patch.msgid.link/20251022222830.634086-1-wusamuel@google.com Signed-off-by: Rafael J. Wysocki --- kernel/power/process.c | 1 + kernel/power/suspend.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index 8ff68ebaa1e0..dc0dfc349f22 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -132,6 +132,7 @@ int freeze_processes(void) if (!pm_freezing) static_branch_inc(&freezer_active); + pm_wakeup_clear(0); pm_freezing = true; error = try_to_freeze_tasks(true); if (!error) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4bb4686c1c08..b4ca17c2fecf 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -595,7 +595,6 @@ static int enter_state(suspend_state_t state) } pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]); - pm_wakeup_clear(0); pm_suspend_clear_flags(); error = suspend_prepare(state); if (error) From 10843e1492e474c02b91314963161731fa92af91 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 21 Oct 2025 13:09:33 +0800 Subject: [PATCH 0337/1075] net: bonding: fix possible peer notify event loss or dup issue If the send_peer_notif counter and the peer event notify are not synchronized. It may cause problems such as the loss or dup of peer notify event. Before this patch: - If should_notify_peers is true and the lock for send_peer_notif-- fails, peer event may be sent again in next mii_monitor loop, because should_notify_peers is still true. - If should_notify_peers is true and the lock for send_peer_notif-- succeeded, but the lock for peer event fails, the peer event will be lost. This patch locks the RTNL for send_peer_notif, events, and commit simultaneously. Fixes: 07a4ddec3ce9 ("bonding: add an option to specify a delay between peer notifications") Cc: Jay Vosburgh Cc: Andrew Lunn Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Hangbin Liu Cc: Nikolay Aleksandrov Cc: Vincent Bernat Cc: Signed-off-by: Tonghao Zhang Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20251021050933.46412-1-tonghao@bamaicloud.com Signed-off-by: Paolo Abeni --- drivers/net/bonding/bond_main.c | 40 +++++++++++++++------------------ 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 67fdcbdd2764..e95e593cd12d 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2874,7 +2874,7 @@ static void bond_mii_monitor(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mii_work.work); - bool should_notify_peers = false; + bool should_notify_peers; bool commit; unsigned long delay; struct slave *slave; @@ -2886,30 +2886,33 @@ static void bond_mii_monitor(struct work_struct *work) goto re_arm; rcu_read_lock(); + should_notify_peers = bond_should_notify_peers(bond); commit = !!bond_miimon_inspect(bond); - if (bond->send_peer_notif) { - rcu_read_unlock(); - if (rtnl_trylock()) { - bond->send_peer_notif--; - rtnl_unlock(); - } - } else { - rcu_read_unlock(); - } - if (commit) { + rcu_read_unlock(); + + if (commit || bond->send_peer_notif) { /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { delay = 1; - should_notify_peers = false; goto re_arm; } - bond_for_each_slave(bond, slave, iter) { - bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER); + if (commit) { + bond_for_each_slave(bond, slave, iter) { + bond_commit_link_state(slave, + BOND_SLAVE_NOTIFY_LATER); + } + bond_miimon_commit(bond); + } + + if (bond->send_peer_notif) { + bond->send_peer_notif--; + if (should_notify_peers) + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, + bond->dev); } - bond_miimon_commit(bond); rtnl_unlock(); /* might sleep, hold no other locks */ } @@ -2917,13 +2920,6 @@ static void bond_mii_monitor(struct work_struct *work) re_arm: if (bond->params.miimon) queue_delayed_work(bond->wq, &bond->mii_work, delay); - - if (should_notify_peers) { - if (!rtnl_trylock()) - return; - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); - rtnl_unlock(); - } } static int bond_upper_dev_walk(struct net_device *upper, From 622865c73ae30f254abdf182f4b66cccbe3e0f10 Mon Sep 17 00:00:00 2001 From: LI Qingwu Date: Thu, 23 Oct 2025 03:44:22 +0000 Subject: [PATCH 0338/1075] USB: serial: option: add Telit FN920C04 ECM compositions Add support for the Telit Cinterion FN920C04 module when operating in ECM (Ethernet Control Model) mode. The following USB product IDs are used by the module when AT#USBCFG is set to 3 or 7. 0x10A3: ECM + tty (NMEA) + tty (DUN) [+ tty (DIAG)] T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a3 Rev= 5.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=76e7cb38 C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10A8: ECM + tty (DUN) + tty (AUX) [+ tty (DIAG)] T: Bus=03 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a8 Rev= 5.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=76e7cb38 C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Adding these IDs allows the option driver to automatically create the corresponding /dev/ttyUSB* ports under ECM mode. Tested with FN920C04 under ECM configuration (USBCFG=3 and 7). Signed-off-by: LI Qingwu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3d6ebe2692a9..5de856f65f0d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1403,10 +1403,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a2, 0xff), /* Telit FN920C04 (MBIM) */ .driver_info = NCTRL(4) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a3, 0xff), /* Telit FN920C04 (ECM) */ + .driver_info = NCTRL(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff), /* Telit FN20C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a7, 0xff), /* Telit FN920C04 (MBIM) */ .driver_info = NCTRL(4) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a8, 0xff), /* Telit FN920C04 (ECM) */ + .driver_info = NCTRL(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff), /* Telit FN20C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10aa, 0xff), /* Telit FN920C04 (MBIM) */ From 4c4e6ea4a120cc5ab58e437c6ba123cbfc357d45 Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Thu, 23 Oct 2025 15:02:30 +0800 Subject: [PATCH 0339/1075] gpio: ljca: Fix duplicated IRQ mapping The generic_handle_domain_irq() function resolves the hardware IRQ internally. The driver performed a duplicative mapping by calling irq_find_mapping() first, which could lead to an RCU stall. Delete the redundant irq_find_mapping() call and pass the hardware IRQ directly to generic_handle_domain_irq(). Fixes: c5a4b6fd31e8 ("gpio: Add support for Intel LJCA USB GPIO driver") Signed-off-by: Haotian Zhang Link: https://lore.kernel.org/r/20251023070231.1305-1-vulab@iscas.ac.cn [Bartosz: remove unused variable] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-ljca.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-ljca.c b/drivers/gpio/gpio-ljca.c index 3b4f8830c741..f32d1d237795 100644 --- a/drivers/gpio/gpio-ljca.c +++ b/drivers/gpio/gpio-ljca.c @@ -286,22 +286,14 @@ static void ljca_gpio_event_cb(void *context, u8 cmd, const void *evt_data, { const struct ljca_gpio_packet *packet = evt_data; struct ljca_gpio_dev *ljca_gpio = context; - int i, irq; + int i; if (cmd != LJCA_GPIO_INT_EVENT) return; for (i = 0; i < packet->num; i++) { - irq = irq_find_mapping(ljca_gpio->gc.irq.domain, - packet->item[i].index); - if (!irq) { - dev_err(ljca_gpio->gc.parent, - "gpio_id %u does not mapped to IRQ yet\n", - packet->item[i].index); - return; - } - - generic_handle_domain_irq(ljca_gpio->gc.irq.domain, irq); + generic_handle_domain_irq(ljca_gpio->gc.irq.domain, + packet->item[i].index); set_bit(packet->item[i].index, ljca_gpio->reenable_irqs); } From cfca1637bc2b6b1e4f191d2f0b25f12402fbbb26 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 23 Oct 2025 11:23:46 +0200 Subject: [PATCH 0340/1075] ASoC: Intel: avs: Unprepare a stream when XRUN occurs The pcm->prepare() function may be called multiple times in a row by the userspace, as mentioned in the documentation. The driver shall take that into account and prevent redundancy. However, the exact same function is called during XRUNs and in such case, the particular stream shall be reset and setup anew. Fixes: 9114700b496c ("ASoC: Intel: avs: Generic PCM FE operations") Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20251023092348.3119313-2-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index d31058e2de5b..501466bd1f7f 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -754,6 +754,8 @@ static int avs_dai_fe_prepare(struct snd_pcm_substream *substream, struct snd_so data = snd_soc_dai_get_dma_data(dai, substream); host_stream = data->host_stream; + if (runtime->state == SNDRV_PCM_STATE_XRUN) + hdac_stream(host_stream)->prepared = false; if (hdac_stream(host_stream)->prepared) return 0; From 845f716dc5f354c719f6fda35048b6c2eca99331 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 23 Oct 2025 11:23:47 +0200 Subject: [PATCH 0341/1075] ASoC: Intel: avs: Disable periods-elapsed work when closing PCM avs_dai_fe_shutdown() handles the shutdown procedure for HOST HDAudio stream while period-elapsed work services its IRQs. As the former frees the DAI's private context, these two operations shall be synchronized to avoid slab-use-after-free or worse errors. Fixes: 0dbb186c3510 ("ASoC: Intel: avs: Update stream status in a separate thread") Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20251023092348.3119313-3-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/pcm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index 501466bd1f7f..80c001120cdd 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -651,6 +651,7 @@ static void avs_dai_fe_shutdown(struct snd_pcm_substream *substream, struct snd_ data = snd_soc_dai_get_dma_data(dai, substream); + disable_work_sync(&data->period_elapsed_work); snd_hdac_ext_stream_release(data->host_stream, HDAC_EXT_STREAM_TYPE_HOST); avs_dai_shutdown(substream, dai); } From 64007ad3e2a0e0a0ded8b2c6a72c0bb7883d3a33 Mon Sep 17 00:00:00 2001 From: Cezary Rojewski Date: Thu, 23 Oct 2025 11:23:48 +0200 Subject: [PATCH 0342/1075] ASoC: Intel: avs: Use snd_codec format when initializing probe The data probing is a debug feature. Currently parameters channels and rate specified by the application are read while the format is ignored. More robust approach is to read all of them. Audio format, while not used by the Probe module for PCM streaming, takes part in the gateway initialization on the DSP side. With full parametrization we gain better coverage with the data probing feature. Signed-off-by: Cezary Rojewski Link: https://patch.msgid.link/20251023092348.3119313-4-cezary.rojewski@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/avs/probes.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/avs/probes.c b/sound/soc/intel/avs/probes.c index 693ecfe68fd0..74096236984a 100644 --- a/sound/soc/intel/avs/probes.c +++ b/sound/soc/intel/avs/probes.c @@ -14,8 +14,8 @@ #include "debug.h" #include "messages.h" -static int avs_dsp_init_probe(struct avs_dev *adev, union avs_connector_node_id node_id, - size_t buffer_size) +static int avs_dsp_init_probe(struct avs_dev *adev, struct snd_compr_params *params, int bps, + union avs_connector_node_id node_id, size_t buffer_size) { struct avs_probe_cfg cfg = {{0}}; struct avs_module_entry mentry; @@ -27,12 +27,16 @@ static int avs_dsp_init_probe(struct avs_dev *adev, union avs_connector_node_id return ret; /* - * Probe module uses no cycles, audio data format and input and output - * frame sizes are unused. It is also not owned by any pipeline. + * Probe module uses no cycles, input and output frame sizes are unused. + * It is also not owned by any pipeline. */ cfg.base.ibs = 1; /* BSS module descriptor is always segment of index=2. */ cfg.base.is_pages = mentry.segments[2].flags.length; + cfg.base.audio_fmt.sampling_freq = params->codec.sample_rate; + cfg.base.audio_fmt.bit_depth = bps; + cfg.base.audio_fmt.num_channels = params->codec.ch_out; + cfg.base.audio_fmt.valid_bit_depth = bps; cfg.gtw_cfg.node_id = node_id; cfg.gtw_cfg.dma_buffer_size = buffer_size; @@ -128,8 +132,6 @@ static int avs_probe_compr_set_params(struct snd_compr_stream *cstream, struct hdac_ext_stream *host_stream = avs_compr_get_host_stream(cstream); struct snd_compr_runtime *rtd = cstream->runtime; struct avs_dev *adev = to_avs_dev(dai->dev); - /* compr params do not store bit depth, default to S32_LE. */ - snd_pcm_format_t format = SNDRV_PCM_FORMAT_S32_LE; unsigned int format_val; int bps, ret; @@ -142,7 +144,7 @@ static int avs_probe_compr_set_params(struct snd_compr_stream *cstream, ret = snd_compr_malloc_pages(cstream, rtd->buffer_size); if (ret < 0) return ret; - bps = snd_pcm_format_physical_width(format); + bps = snd_pcm_format_physical_width(params->codec.format); if (bps < 0) return bps; format_val = snd_hdac_stream_format(params->codec.ch_out, bps, params->codec.sample_rate); @@ -166,7 +168,7 @@ static int avs_probe_compr_set_params(struct snd_compr_stream *cstream, node_id.vindex = hdac_stream(host_stream)->stream_tag - 1; node_id.dma_type = AVS_DMA_HDA_HOST_INPUT; - ret = avs_dsp_init_probe(adev, node_id, rtd->dma_bytes); + ret = avs_dsp_init_probe(adev, params, bps, node_id, rtd->dma_bytes); if (ret < 0) { dev_err(dai->dev, "probe init failed: %d\n", ret); avs_dsp_enable_d0ix(adev); From d9fbe5b0bf7e2d1e20d53e4e2274f9f61bdcca98 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 23 Oct 2025 14:45:37 +0800 Subject: [PATCH 0343/1075] ASoC: fsl_sai: fix bit order for DSD format The DSD little endian format requires the msb first, because oldest bit is in msb. found this issue by testing with pipewire. Fixes: c111c2ddb3fd ("ASoC: fsl_sai: Add PDM daifmt support") Signed-off-by: Shengjiu Wang Link: https://patch.msgid.link/20251023064538.368850-2-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 757e7868e322..65093325a6b6 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -353,7 +353,6 @@ static int fsl_sai_set_dai_fmt_tr(struct snd_soc_dai *cpu_dai, break; case SND_SOC_DAIFMT_PDM: val_cr2 |= FSL_SAI_CR2_BCP; - val_cr4 &= ~FSL_SAI_CR4_MF; sai->is_pdm_mode = true; break; case SND_SOC_DAIFMT_RIGHT_J: @@ -638,7 +637,7 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, val_cr5 |= FSL_SAI_CR5_WNW(slot_width); val_cr5 |= FSL_SAI_CR5_W0W(slot_width); - if (sai->is_lsb_first || sai->is_pdm_mode) + if (sai->is_lsb_first) val_cr5 |= FSL_SAI_CR5_FBT(0); else val_cr5 |= FSL_SAI_CR5_FBT(word_width - 1); From ba3a5e1aeaa01ea67067d725710a839114214fc6 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 23 Oct 2025 14:45:38 +0800 Subject: [PATCH 0344/1075] ASoC: fsl_micfil: correct the endian format for DSD The DSD format supported by micfil is that oldest bit is in bit 31, so the format should be DSD little endian format. Fixes: 21aa330fec31 ("ASoC: fsl_micfil: Add decimation filter bypass mode support") Signed-off-by: Shengjiu Wang Reviewed-by: Daniel Baluta Link: https://patch.msgid.link/20251023064538.368850-3-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_micfil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index aabd90a8b3ec..cac26ba0aa4b 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -131,7 +131,7 @@ static struct fsl_micfil_soc_data fsl_micfil_imx943 = { .fifos = 8, .fifo_depth = 32, .dataline = 0xf, - .formats = SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_DSD_U32_BE, + .formats = SNDRV_PCM_FMTBIT_S32_LE | SNDRV_PCM_FMTBIT_DSD_U32_LE, .use_edma = true, .use_verid = true, .volume_sx = false, @@ -823,7 +823,7 @@ static int fsl_micfil_hw_params(struct snd_pcm_substream *substream, break; } - if (format == SNDRV_PCM_FORMAT_DSD_U32_BE) { + if (format == SNDRV_PCM_FORMAT_DSD_U32_LE) { micfil->dec_bypass = true; /* * According to equation 29 in RM: From c5efc6a0b3940381d67887302ddb87a5cf623685 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Thu, 23 Oct 2025 04:55:24 -0700 Subject: [PATCH 0345/1075] io_uring: correct __must_hold annotation in io_install_fixed_file The __must_hold annotation references &req->ctx->uring_lock, but req is not in scope in io_install_fixed_file. This change updates the annotation to reference the correct ctx->uring_lock. improving code clarity. Fixes: f110ed8498af ("io_uring: split out fixed file installation and removal") Signed-off-by: Alok Tiwari Signed-off-by: Jens Axboe --- io_uring/filetable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/filetable.c b/io_uring/filetable.c index a21660e3145a..794ef95df293 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -57,7 +57,7 @@ void io_free_file_tables(struct io_ring_ctx *ctx, struct io_file_table *table) static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, u32 slot_index) - __must_hold(&req->ctx->uring_lock) + __must_hold(&ctx->uring_lock) { struct io_rsrc_node *node; From f6ceec6434b5efff62cecbaa2ff74fc29b96c0c6 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Tue, 21 Oct 2025 12:09:40 +0200 Subject: [PATCH 0346/1075] net: datagram: introduce datagram_poll_queue for custom receive queues Some protocols using TCP encapsulation (e.g., espintcp, openvpn) deliver userspace-bound packets through a custom skb queue rather than the standard sk_receive_queue. Introduce datagram_poll_queue that accepts an explicit receive queue, and convert datagram_poll into a wrapper around datagram_poll_queue. This allows protocols with custom skb queues to reuse the core polling logic without relying on sk_receive_queue. Cc: Sabrina Dubroca Cc: Antonio Quartulli Signed-off-by: Ralf Lici Reviewed-by: Sabrina Dubroca Reviewed-by: Antonio Quartulli Link: https://patch.msgid.link/20251021100942.195010-2-ralf@mandelbit.com Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 3 +++ net/core/datagram.c | 44 ++++++++++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fb3fec9affaa..a7cc3d1f4fd1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4204,6 +4204,9 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err); +__poll_t datagram_poll_queue(struct file *file, struct socket *sock, + struct poll_table_struct *wait, + struct sk_buff_head *rcv_queue); __poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, diff --git a/net/core/datagram.c b/net/core/datagram.c index cb4b9ef2e4e3..c285c6465923 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -920,21 +920,22 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); /** - * datagram_poll - generic datagram poll + * datagram_poll_queue - same as datagram_poll, but on a specific receive + * queue * @file: file struct * @sock: socket * @wait: poll table + * @rcv_queue: receive queue to poll * - * Datagram poll: Again totally generic. This also handles - * sequenced packet sockets providing the socket receive queue - * is only ever holding data ready to receive. + * Performs polling on the given receive queue, handling shutdown, error, + * and connection state. This is useful for protocols that deliver + * userspace-bound packets through a custom queue instead of + * sk->sk_receive_queue. * - * Note: when you *don't* use this routine for this protocol, - * and you use a different write policy from sock_writeable() - * then please supply your own write_space callback. + * Return: poll bitmask indicating the socket's current state */ -__poll_t datagram_poll(struct file *file, struct socket *sock, - poll_table *wait) +__poll_t datagram_poll_queue(struct file *file, struct socket *sock, + poll_table *wait, struct sk_buff_head *rcv_queue) { struct sock *sk = sock->sk; __poll_t mask; @@ -956,7 +957,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, mask |= EPOLLHUP; /* readable? */ - if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(rcv_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -978,4 +979,27 @@ __poll_t datagram_poll(struct file *file, struct socket *sock, return mask; } +EXPORT_SYMBOL(datagram_poll_queue); + +/** + * datagram_poll - generic datagram poll + * @file: file struct + * @sock: socket + * @wait: poll table + * + * Datagram poll: Again totally generic. This also handles + * sequenced packet sockets providing the socket receive queue + * is only ever holding data ready to receive. + * + * Note: when you *don't* use this routine for this protocol, + * and you use a different write policy from sock_writeable() + * then please supply your own write_space callback. + * + * Return: poll bitmask indicating the socket's current state + */ +__poll_t datagram_poll(struct file *file, struct socket *sock, poll_table *wait) +{ + return datagram_poll_queue(file, sock, wait, + &sock->sk->sk_receive_queue); +} EXPORT_SYMBOL(datagram_poll); From 0fc3e32c2c069f541f2724d91f5e98480b640326 Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Tue, 21 Oct 2025 12:09:41 +0200 Subject: [PATCH 0347/1075] espintcp: use datagram_poll_queue for socket readiness espintcp uses a custom queue (ike_queue) to deliver packets to userspace. The polling logic relies on datagram_poll, which checks sk_receive_queue, which can lead to false readiness signals when that queue contains non-userspace packets. Switch espintcp_poll to use datagram_poll_queue with ike_queue, ensuring poll only signals readiness when userspace data is actually available. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Signed-off-by: Ralf Lici Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251021100942.195010-3-ralf@mandelbit.com Signed-off-by: Paolo Abeni --- net/xfrm/espintcp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index fc7a603b04f1..bf744ac9d5a7 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -555,14 +555,10 @@ static void espintcp_close(struct sock *sk, long timeout) static __poll_t espintcp_poll(struct file *file, struct socket *sock, poll_table *wait) { - __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; struct espintcp_ctx *ctx = espintcp_getctx(sk); - if (!skb_queue_empty(&ctx->ike_queue)) - mask |= EPOLLIN | EPOLLRDNORM; - - return mask; + return datagram_poll_queue(file, sock, wait, &ctx->ike_queue); } static void build_protos(struct proto *espintcp_prot, From efd729408bc7d57e0c8d027b9ff514187fc1a05b Mon Sep 17 00:00:00 2001 From: Ralf Lici Date: Tue, 21 Oct 2025 12:09:42 +0200 Subject: [PATCH 0348/1075] ovpn: use datagram_poll_queue for socket readiness in TCP openvpn TCP encapsulation uses a custom queue to deliver packets to userspace. Currently it relies on datagram_poll, which checks sk_receive_queue, leading to false readiness signals when that queue contains non-userspace packets. Switch ovpn_tcp_poll to use datagram_poll_queue with the peer's user_queue, ensuring poll only signals readiness when userspace data is actually available. Also refactor ovpn_tcp_poll in order to enforce the assumption we can make on the lifetime of ovpn_sock and peer. Fixes: 11851cbd60ea ("ovpn: implement TCP transport") Signed-off-by: Antonio Quartulli Signed-off-by: Ralf Lici Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251021100942.195010-4-ralf@mandelbit.com Signed-off-by: Paolo Abeni --- drivers/net/ovpn/tcp.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c index 289f62c5d2c7..0d7f30360d87 100644 --- a/drivers/net/ovpn/tcp.c +++ b/drivers/net/ovpn/tcp.c @@ -560,16 +560,34 @@ static void ovpn_tcp_close(struct sock *sk, long timeout) static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { - __poll_t mask = datagram_poll(file, sock, wait); + struct sk_buff_head *queue = &sock->sk->sk_receive_queue; struct ovpn_socket *ovpn_sock; + struct ovpn_peer *peer = NULL; + __poll_t mask; rcu_read_lock(); ovpn_sock = rcu_dereference_sk_user_data(sock->sk); - if (ovpn_sock && ovpn_sock->peer && - !skb_queue_empty(&ovpn_sock->peer->tcp.user_queue)) - mask |= EPOLLIN | EPOLLRDNORM; + /* if we landed in this callback, we expect to have a + * meaningful state. The ovpn_socket lifecycle would + * prevent it otherwise. + */ + if (WARN(!ovpn_sock || !ovpn_sock->peer, + "ovpn: null state in ovpn_tcp_poll!")) { + rcu_read_unlock(); + return 0; + } + + if (ovpn_peer_hold(ovpn_sock->peer)) { + peer = ovpn_sock->peer; + queue = &peer->tcp.user_queue; + } rcu_read_unlock(); + mask = datagram_poll_queue(file, sock, wait, queue); + + if (peer) + ovpn_peer_put(peer); + return mask; } From f7c877e7535260cc7a21484c994e8ce7e8cb6780 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 21 Oct 2025 14:17:18 +0200 Subject: [PATCH 0349/1075] vsock: fix lock inversion in vsock_assign_transport() Syzbot reported a potential lock inversion deadlock between vsock_register_mutex and sk_lock-AF_VSOCK when vsock_linger() is called. The issue was introduced by commit 687aa0c5581b ("vsock: Fix transport_* TOCTOU") which added vsock_register_mutex locking in vsock_assign_transport() around the transport->release() call, that can call vsock_linger(). vsock_assign_transport() can be called with sk_lock held. vsock_linger() calls sk_wait_event() that temporarily releases and re-acquires sk_lock. During this window, if another thread hold vsock_register_mutex while trying to acquire sk_lock, a circular dependency is created. Fix this by releasing vsock_register_mutex before calling transport->release() and vsock_deassign_transport(). This is safe because we don't need to hold vsock_register_mutex while releasing the old transport, and we ensure the new transport won't disappear by obtaining a module reference first via try_module_get(). Reported-by: syzbot+10e35716f8e4929681fa@syzkaller.appspotmail.com Tested-by: syzbot+10e35716f8e4929681fa@syzkaller.appspotmail.com Fixes: 687aa0c5581b ("vsock: Fix transport_* TOCTOU") Cc: mhal@rbox.co Cc: stable@vger.kernel.org Signed-off-by: Stefano Garzarella Link: https://patch.msgid.link/20251021121718.137668-1-sgarzare@redhat.com Signed-off-by: Paolo Abeni --- net/vmw_vsock/af_vsock.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 4c2db6cca557..76763247a377 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -487,12 +487,26 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) goto err; } - if (vsk->transport) { - if (vsk->transport == new_transport) { - ret = 0; - goto err; - } + if (vsk->transport && vsk->transport == new_transport) { + ret = 0; + goto err; + } + /* We increase the module refcnt to prevent the transport unloading + * while there are open sockets assigned to it. + */ + if (!new_transport || !try_module_get(new_transport->module)) { + ret = -ENODEV; + goto err; + } + + /* It's safe to release the mutex after a successful try_module_get(). + * Whichever transport `new_transport` points at, it won't go away until + * the last module_put() below or in vsock_deassign_transport(). + */ + mutex_unlock(&vsock_register_mutex); + + if (vsk->transport) { /* transport->release() must be called with sock lock acquired. * This path can only be taken during vsock_connect(), where we * have already held the sock lock. In the other cases, this @@ -512,20 +526,6 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) vsk->peer_shutdown = 0; } - /* We increase the module refcnt to prevent the transport unloading - * while there are open sockets assigned to it. - */ - if (!new_transport || !try_module_get(new_transport->module)) { - ret = -ENODEV; - goto err; - } - - /* It's safe to release the mutex after a successful try_module_get(). - * Whichever transport `new_transport` points at, it won't go away until - * the last module_put() below or in vsock_deassign_transport(). - */ - mutex_unlock(&vsock_register_mutex); - if (sk->sk_type == SOCK_SEQPACKET) { if (!new_transport->seqpacket_allow || !new_transport->seqpacket_allow(remote_cid)) { From 0fd20f65df6aa430454a0deed8f43efa91c54835 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Thu, 16 Oct 2025 11:27:03 +0200 Subject: [PATCH 0350/1075] s390/pci: Avoid deadlock between PCI error recovery and mlx5 crdump Do not block PCI config accesses through pci_cfg_access_lock() when executing the s390 variant of PCI error recovery: Acquire just device_lock() instead of pci_dev_lock() as powerpc's EEH and generig PCI AER processing do. During error recovery testing a pair of tasks was reported to be hung: mlx5_core 0000:00:00.1: mlx5_health_try_recover:338:(pid 5553): health recovery flow aborted, PCI reads still not working INFO: task kmcheck:72 blocked for more than 122 seconds. Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kmcheck state:D stack:0 pid:72 tgid:72 ppid:2 flags:0x00000000 Call Trace: [<000000065256f030>] __schedule+0x2a0/0x590 [<000000065256f356>] schedule+0x36/0xe0 [<000000065256f572>] schedule_preempt_disabled+0x22/0x30 [<0000000652570a94>] __mutex_lock.constprop.0+0x484/0x8a8 [<000003ff800673a4>] mlx5_unload_one+0x34/0x58 [mlx5_core] [<000003ff8006745c>] mlx5_pci_err_detected+0x94/0x140 [mlx5_core] [<0000000652556c5a>] zpci_event_attempt_error_recovery+0xf2/0x398 [<0000000651b9184a>] __zpci_event_error+0x23a/0x2c0 INFO: task kworker/u1664:6:1514 blocked for more than 122 seconds. Not tainted 5.14.0-570.12.1.bringup7.el9.s390x #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:kworker/u1664:6 state:D stack:0 pid:1514 tgid:1514 ppid:2 flags:0x00000000 Workqueue: mlx5_health0000:00:00.0 mlx5_fw_fatal_reporter_err_work [mlx5_core] Call Trace: [<000000065256f030>] __schedule+0x2a0/0x590 [<000000065256f356>] schedule+0x36/0xe0 [<0000000652172e28>] pci_wait_cfg+0x80/0xe8 [<0000000652172f94>] pci_cfg_access_lock+0x74/0x88 [<000003ff800916b6>] mlx5_vsc_gw_lock+0x36/0x178 [mlx5_core] [<000003ff80098824>] mlx5_crdump_collect+0x34/0x1c8 [mlx5_core] [<000003ff80074b62>] mlx5_fw_fatal_reporter_dump+0x6a/0xe8 [mlx5_core] [<0000000652512242>] devlink_health_do_dump.part.0+0x82/0x168 [<0000000652513212>] devlink_health_report+0x19a/0x230 [<000003ff80075a12>] mlx5_fw_fatal_reporter_err_work+0xba/0x1b0 [mlx5_core] No kernel log of the exact same error with an upstream kernel is available - but the very same deadlock situation can be constructed there, too: - task: kmcheck mlx5_unload_one() tries to acquire devlink lock while the PCI error recovery code has set pdev->block_cfg_access by way of pci_cfg_access_lock() - task: kworker mlx5_crdump_collect() tries to set block_cfg_access through pci_cfg_access_lock() while devlink_health_report() had acquired the devlink lock. A similar deadlock situation can be reproduced by requesting a crdump with > devlink health dump show pci/ reporter fw_fatal while PCI error recovery is executed on the same physical function by mlx5_core's pci_error_handlers. On s390 this can be injected with > zpcictl --reset-fw Tests with this patch failed to reproduce that second deadlock situation, the devlink command is rejected with "kernel answers: Permission denied" - and we get a kernel log message of: mlx5_core 1ed0:00:00.1: mlx5_crdump_collect:50:(pid 254382): crdump: failed to lock vsc gw err -5 because the config read of VSC_SEMAPHORE is rejected by the underlying hardware. Two prior attempts to address this issue have been discussed and ultimately rejected [see link], with the primary argument that s390's implementation of PCI error recovery is imposing restrictions that neither powerpc's EEH nor PCI AER handling need. Tests show that PCI error recovery on s390 is running to completion even without blocking access to PCI config space. Link: https://lore.kernel.org/all/20251007144826.2825134-1-gbayer@linux.ibm.com/ Cc: stable@vger.kernel.org Fixes: 4cdf2f4e24ff ("s390/pci: implement minimal PCI error recovery") Reviewed-by: Niklas Schnelle Signed-off-by: Gerd Bayer Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b95376041501..27db1e72c623 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -188,7 +188,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) * is unbound or probed and that userspace can't access its * configuration space while we perform recovery. */ - pci_dev_lock(pdev); + device_lock(&pdev->dev); if (pdev->error_state == pci_channel_io_perm_failure) { ers_res = PCI_ERS_RESULT_DISCONNECT; goto out_unlock; @@ -257,7 +257,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) driver->err_handler->resume(pdev); pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED); out_unlock: - pci_dev_unlock(pdev); + device_unlock(&pdev->dev); zpci_report_status(zdev, "recovery", status_str); return ers_res; From 399d10934740ae8cdaa4e3245f7c5f6c332da844 Mon Sep 17 00:00:00 2001 From: Robert Marko Date: Tue, 21 Oct 2025 15:20:26 +0200 Subject: [PATCH 0351/1075] net: phy: micrel: always set shared->phydev for LAN8814 Currently, during the LAN8814 PTP probe shared->phydev is only set if PTP clock gets actually set, otherwise the function will return before setting it. This is an issue as shared->phydev is unconditionally being used when IRQ is being handled, especially in lan8814_gpio_process_cap and since it was not set it will cause a NULL pointer exception and crash the kernel. So, simply always set shared->phydev to avoid the NULL pointer exception. Fixes: b3f1a08fcf0d ("net: phy: micrel: Add support for PTP_PF_EXTTS for lan8814") Signed-off-by: Robert Marko Tested-by: Horatiu Vultur Link: https://patch.msgid.link/20251021132034.983936-1-robert.marko@sartura.hr Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 79ce3eb6752b..604b5de0c158 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -4262,6 +4262,8 @@ static int __lan8814_ptp_probe_once(struct phy_device *phydev, char *pin_name, { struct lan8814_shared_priv *shared = phy_package_get_priv(phydev); + shared->phydev = phydev; + /* Initialise shared lock for clock*/ mutex_init(&shared->shared_lock); @@ -4317,8 +4319,6 @@ static int __lan8814_ptp_probe_once(struct phy_device *phydev, char *pin_name, phydev_dbg(phydev, "successfully registered ptp clock\n"); - shared->phydev = phydev; - /* The EP.4 is shared between all the PHYs in the package and also it * can be accessed by any of the PHYs */ From b2284768c6b32aa224ca7d0ef0741beb434f03aa Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 22 Oct 2025 11:44:21 +0800 Subject: [PATCH 0352/1075] virtio-net: zero unused hash fields When GSO tunnel is negotiated virtio_net_hdr_tnl_from_skb() tries to initialize the tunnel metadata but forget to zero unused rxhash fields. This may leak information to another side. Fixing this by zeroing the unused hash fields. Acked-by: Michael S. Tsirkin Fixes: a2fb4bc4e2a6a ("net: implement virtio helpers to handle UDP GSO tunneling") Cc: Signed-off-by: Jason Wang Reviewed-by: Xuan Zhuo Link: https://patch.msgid.link/20251022034421.70244-1-jasowang@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/virtio_net.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 20e0584db1dd..4d1780848d0e 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -401,6 +401,10 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (!tnl_hdr_negotiated) return -EINVAL; + vhdr->hash_hdr.hash_value = 0; + vhdr->hash_hdr.hash_report = 0; + vhdr->hash_hdr.padding = 0; + /* Let the basic parsing deal with plain GSO features. */ skb_shinfo(skb)->gso_type &= ~tnl_gso_type; ret = virtio_net_hdr_from_skb(skb, hdr, true, false, vlan_hlen); From bb65e0c141f879cdf54db11ae446ee3605fb54d5 Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Wed, 22 Oct 2025 15:29:39 +0300 Subject: [PATCH 0353/1075] net/mlx5: Add PPHCR to PCAM supported registers mask Add the PPHCR bit to the port_access_reg_cap_mask field of PCAM register to indicate that the device supports the PPHCR register and the RS-FEC histogram feature. Signed-off-by: Alexei Lazar Reviewed-by: Yael Chemla Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761136182-918470-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 07614cd95bed..1b0b36aa2a76 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -10833,7 +10833,9 @@ struct mlx5_ifc_pcam_regs_5000_to_507f_bits { u8 port_access_reg_cap_mask_127_to_96[0x20]; u8 port_access_reg_cap_mask_95_to_64[0x20]; - u8 port_access_reg_cap_mask_63_to_36[0x1c]; + u8 port_access_reg_cap_mask_63[0x1]; + u8 pphcr[0x1]; + u8 port_access_reg_cap_mask_61_to_36[0x1a]; u8 pplm[0x1]; u8 port_access_reg_cap_mask_34_to_32[0x3]; From d58a9a917aa39b10250ec16cb9c24e942cbc77d3 Mon Sep 17 00:00:00 2001 From: Alexei Lazar Date: Wed, 22 Oct 2025 15:29:40 +0300 Subject: [PATCH 0354/1075] net/mlx5e: Skip PPHCR register query if not supported by the device Check the PCAM supported registers mask before querying the PPHCR register, as it is not supported in older devices. Fixes: 44907e7c8fd0 ("net/mlx5e: Add logic to read RS-FEC histogram bin ranges from PPHCR") Signed-off-by: Alexei Lazar Reviewed-by: Yael Chemla Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761136182-918470-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 7c029a7d0fd7..a2802cfc9b98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1614,7 +1614,9 @@ void mlx5e_stats_fec_get(struct mlx5e_priv *priv, fec_set_corrected_bits_total(priv, fec_stats); fec_set_block_stats(priv, mode, fec_stats); - fec_set_histograms_stats(priv, mode, hist); + + if (MLX5_CAP_PCAM_REG(priv->mdev, pphcr)) + fec_set_histograms_stats(priv, mode, hist); } #define PPORT_ETH_EXT_OFF(c) \ From 8f82f89550daafc8ca3ba74c389ae1b4afdd75c8 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 22 Oct 2025 15:29:41 +0300 Subject: [PATCH 0355/1075] net/mlx5: Refactor devcom to return NULL on failure Devcom device and component registration isn't always critical to the functionality of the caller, hence the registration can fail and we can continue working with an ERR_PTR value saved inside a variable. In order to avoid that make sure all devcom failures return NULL. Signed-off-by: Patrisious Haddad Reviewed-by: Leon Romanovsky Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761136182-918470-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 6 +-- .../mellanox/mlx5/core/eswitch_offloads.c | 4 +- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 7 ++- .../ethernet/mellanox/mlx5/core/lib/clock.c | 2 +- .../ethernet/mellanox/mlx5/core/lib/devcom.c | 53 +++++++++---------- .../net/ethernet/mellanox/mlx5/core/lib/sd.c | 4 +- .../net/ethernet/mellanox/mlx5/core/main.c | 5 +- 7 files changed, 39 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a56825921c23..41fd5eee6306 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -242,8 +242,8 @@ static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data) &attr, mlx5e_devcom_event_mpv, priv); - if (IS_ERR(priv->devcom)) - return PTR_ERR(priv->devcom); + if (!priv->devcom) + return -EINVAL; if (mlx5_core_is_mp_master(priv->mdev)) { mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP, @@ -256,7 +256,7 @@ static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data) static void mlx5e_devcom_cleanup_mpv(struct mlx5e_priv *priv) { - if (IS_ERR_OR_NULL(priv->devcom)) + if (!priv->devcom) return; if (mlx5_core_is_mp_master(priv->mdev)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 4cf995be127d..34749814f19b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -3129,7 +3129,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, attr, mlx5_esw_offloads_devcom_event, esw); - if (IS_ERR(esw->devcom)) + if (!esw->devcom) return; mlx5_devcom_send_event(esw->devcom, @@ -3140,7 +3140,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) { - if (IS_ERR_OR_NULL(esw->devcom)) + if (!esw->devcom) return; mlx5_devcom_send_event(esw->devcom, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 59c00c911275..3db0387bf6dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1430,11 +1430,10 @@ static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS, &attr, NULL, dev); - if (IS_ERR(dev->priv.hca_devcom_comp)) { + if (!dev->priv.hca_devcom_comp) { mlx5_core_err(dev, - "Failed to register devcom HCA component, err: %ld\n", - PTR_ERR(dev->priv.hca_devcom_comp)); - return PTR_ERR(dev->priv.hca_devcom_comp); + "Failed to register devcom HCA component."); + return -EINVAL; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index d0ba83d77cd1..29e7fa09c32c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -1444,7 +1444,7 @@ static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key) compd = mlx5_devcom_register_component(mdev->priv.devc, MLX5_DEVCOM_SHARED_CLOCK, &attr, NULL, mdev); - if (IS_ERR(compd)) + if (!compd) return; mdev->clock_state->compdev = compd; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index faa2833602c8..e749618229bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -76,20 +76,18 @@ mlx5_devcom_dev_alloc(struct mlx5_core_dev *dev) struct mlx5_devcom_dev * mlx5_devcom_register_device(struct mlx5_core_dev *dev) { - struct mlx5_devcom_dev *devc; + struct mlx5_devcom_dev *devc = NULL; mutex_lock(&dev_list_lock); if (devcom_dev_exists(dev)) { - devc = ERR_PTR(-EEXIST); + mlx5_core_err(dev, "devcom device already exists"); goto out; } devc = mlx5_devcom_dev_alloc(dev); - if (!devc) { - devc = ERR_PTR(-ENOMEM); + if (!devc) goto out; - } list_add_tail(&devc->list, &devcom_dev_list); out: @@ -110,8 +108,10 @@ mlx5_devcom_dev_release(struct kref *ref) void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc) { - if (!IS_ERR_OR_NULL(devc)) - kref_put(&devc->ref, mlx5_devcom_dev_release); + if (!devc) + return; + + kref_put(&devc->ref, mlx5_devcom_dev_release); } static struct mlx5_devcom_comp * @@ -122,7 +122,7 @@ mlx5_devcom_comp_alloc(u64 id, const struct mlx5_devcom_match_attr *attr, comp = kzalloc(sizeof(*comp), GFP_KERNEL); if (!comp) - return ERR_PTR(-ENOMEM); + return NULL; comp->id = id; comp->key.key = attr->key; @@ -160,7 +160,7 @@ devcom_alloc_comp_dev(struct mlx5_devcom_dev *devc, devcom = kzalloc(sizeof(*devcom), GFP_KERNEL); if (!devcom) - return ERR_PTR(-ENOMEM); + return NULL; kref_get(&devc->ref); devcom->devc = devc; @@ -240,31 +240,28 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, mlx5_devcom_event_handler_t handler, void *data) { - struct mlx5_devcom_comp_dev *devcom; + struct mlx5_devcom_comp_dev *devcom = NULL; struct mlx5_devcom_comp *comp; - if (IS_ERR_OR_NULL(devc)) - return ERR_PTR(-EINVAL); + if (!devc) + return NULL; mutex_lock(&comp_list_lock); comp = devcom_component_get(devc, id, attr, handler); - if (IS_ERR(comp)) { - devcom = ERR_PTR(-EINVAL); + if (IS_ERR(comp)) goto out_unlock; - } if (!comp) { comp = mlx5_devcom_comp_alloc(id, attr, handler); - if (IS_ERR(comp)) { - devcom = ERR_CAST(comp); + if (!comp) goto out_unlock; - } + list_add_tail(&comp->comp_list, &devcom_comp_list); } mutex_unlock(&comp_list_lock); devcom = devcom_alloc_comp_dev(devc, comp, data); - if (IS_ERR(devcom)) + if (!devcom) kref_put(&comp->ref, mlx5_devcom_comp_release); return devcom; @@ -276,8 +273,10 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom) { - if (!IS_ERR_OR_NULL(devcom)) - devcom_free_comp_dev(devcom); + if (!devcom) + return; + + devcom_free_comp_dev(devcom); } int mlx5_devcom_comp_get_size(struct mlx5_devcom_comp_dev *devcom) @@ -296,7 +295,7 @@ int mlx5_devcom_send_event(struct mlx5_devcom_comp_dev *devcom, int err = 0; void *data; - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return -ENODEV; comp = devcom->comp; @@ -338,7 +337,7 @@ void mlx5_devcom_comp_set_ready(struct mlx5_devcom_comp_dev *devcom, bool ready) bool mlx5_devcom_comp_is_ready(struct mlx5_devcom_comp_dev *devcom) { - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return false; return READ_ONCE(devcom->comp->ready); @@ -348,7 +347,7 @@ bool mlx5_devcom_for_each_peer_begin(struct mlx5_devcom_comp_dev *devcom) { struct mlx5_devcom_comp *comp; - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return false; comp = devcom->comp; @@ -421,21 +420,21 @@ void *mlx5_devcom_get_next_peer_data_rcu(struct mlx5_devcom_comp_dev *devcom, void mlx5_devcom_comp_lock(struct mlx5_devcom_comp_dev *devcom) { - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return; down_write(&devcom->comp->sem); } void mlx5_devcom_comp_unlock(struct mlx5_devcom_comp_dev *devcom) { - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return; up_write(&devcom->comp->sem); } int mlx5_devcom_comp_trylock(struct mlx5_devcom_comp_dev *devcom) { - if (IS_ERR_OR_NULL(devcom)) + if (!devcom) return 0; return down_write_trylock(&devcom->comp->sem); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index f5c2701f6e87..8e17daae48af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -221,8 +221,8 @@ static int sd_register(struct mlx5_core_dev *dev) attr.net = mlx5_core_net(dev); devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP, &attr, NULL, dev); - if (IS_ERR(devcom)) - return PTR_ERR(devcom); + if (!devcom) + return -EINVAL; sd->devcom = devcom; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index df93625c9dfa..70c156591b0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -978,9 +978,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) int err; dev->priv.devc = mlx5_devcom_register_device(dev); - if (IS_ERR(dev->priv.devc)) - mlx5_core_warn(dev, "failed to register devcom device %pe\n", - dev->priv.devc); + if (!dev->priv.devc) + mlx5_core_warn(dev, "failed to register devcom device\n"); err = mlx5_query_board_id(dev); if (err) { From 664f76be38a18c61151d0ef248c7e2f3afb4f3c7 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 22 Oct 2025 15:29:42 +0300 Subject: [PATCH 0356/1075] net/mlx5: Fix IPsec cleanup over MPV device When we do mlx5e_detach_netdev() we eventually disable blocking events notifier, among those events are IPsec MPV events from IB to core. So before disabling those blocking events, make sure to also unregister the devcom device and mark all this device operations as complete, in order to prevent the other device from using invalid netdev during future devcom events which could cause the trace below. BUG: kernel NULL pointer dereference, address: 0000000000000010 PGD 146427067 P4D 146427067 PUD 146488067 PMD 0 Oops: Oops: 0000 [#1] SMP CPU: 1 UID: 0 PID: 7735 Comm: devlink Tainted: GW 6.12.0-rc6_for_upstream_min_debug_2024_11_08_00_46 #1 Tainted: [W]=WARN Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:mlx5_devcom_comp_set_ready+0x5/0x40 [mlx5_core] Code: 00 01 48 83 05 23 32 1e 00 01 41 b8 ed ff ff ff e9 60 ff ff ff 48 83 05 00 32 1e 00 01 eb e3 66 0f 1f 44 00 00 0f 1f 44 00 00 <48> 8b 47 10 48 83 05 5f 32 1e 00 01 48 8b 50 40 48 85 d2 74 05 40 RSP: 0018:ffff88811a5c35f8 EFLAGS: 00010206 RAX: ffff888106e8ab80 RBX: ffff888107d7e200 RCX: ffff88810d6f0a00 RDX: ffff88810d6f0a00 RSI: 0000000000000001 RDI: 0000000000000000 RBP: ffff88811a17e620 R08: 0000000000000040 R09: 0000000000000000 R10: ffff88811a5c3618 R11: 0000000de85d51bd R12: ffff88811a17e600 R13: ffff88810d6f0a00 R14: 0000000000000000 R15: ffff8881034bda80 FS: 00007f27bdf89180(0000) GS:ffff88852c880000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 000000010f159005 CR4: 0000000000372eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __die+0x20/0x60 ? page_fault_oops+0x150/0x3e0 ? exc_page_fault+0x74/0x130 ? asm_exc_page_fault+0x22/0x30 ? mlx5_devcom_comp_set_ready+0x5/0x40 [mlx5_core] mlx5e_devcom_event_mpv+0x42/0x60 [mlx5_core] mlx5_devcom_send_event+0x8c/0x170 [mlx5_core] blocking_event+0x17b/0x230 [mlx5_core] notifier_call_chain+0x35/0xa0 blocking_notifier_call_chain+0x3d/0x60 mlx5_blocking_notifier_call_chain+0x22/0x30 [mlx5_core] mlx5_core_mp_event_replay+0x12/0x20 [mlx5_core] mlx5_ib_bind_slave_port+0x228/0x2c0 [mlx5_ib] mlx5_ib_stage_init_init+0x664/0x9d0 [mlx5_ib] ? idr_alloc_cyclic+0x50/0xb0 ? __kmalloc_cache_noprof+0x167/0x340 ? __kmalloc_noprof+0x1a7/0x430 __mlx5_ib_add+0x34/0xd0 [mlx5_ib] mlx5r_probe+0xe9/0x310 [mlx5_ib] ? kernfs_add_one+0x107/0x150 ? __mlx5_ib_add+0xd0/0xd0 [mlx5_ib] auxiliary_bus_probe+0x3e/0x90 really_probe+0xc5/0x3a0 ? driver_probe_device+0x90/0x90 __driver_probe_device+0x80/0x160 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 bus_for_each_drv+0x80/0xd0 __device_attach+0xbc/0x1f0 bus_probe_device+0x86/0xa0 device_add+0x62d/0x830 __auxiliary_device_add+0x3b/0xa0 ? auxiliary_device_init+0x41/0x90 add_adev+0xd1/0x150 [mlx5_core] mlx5_rescan_drivers_locked+0x21c/0x300 [mlx5_core] esw_mode_change+0x6c/0xc0 [mlx5_core] mlx5_devlink_eswitch_mode_set+0x21e/0x640 [mlx5_core] devlink_nl_eswitch_set_doit+0x60/0xe0 genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x180/0x2b0 ? devlink_get_from_attrs_lock+0x170/0x170 ? devlink_nl_eswitch_get_doit+0x290/0x290 ? devlink_nl_pre_doit_port_optional+0x50/0x50 ? genl_family_rcv_msg_dumpit+0xf0/0xf0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1fc/0x2d0 netlink_sendmsg+0x1e4/0x410 __sock_sendmsg+0x38/0x60 ? sockfd_lookup_light+0x12/0x60 __sys_sendto+0x105/0x160 ? __sys_recvmsg+0x4e/0x90 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x4c/0x100 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7f27bc91b13a Code: bb 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 8b 05 fa 96 2c 00 45 89 c9 4c 63 d1 48 63 ff 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 76 f3 c3 0f 1f 40 00 41 55 41 54 4d 89 c5 55 RSP: 002b:00007fff369557e8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000009c54b10 RCX: 00007f27bc91b13a RDX: 0000000000000038 RSI: 0000000009c54b10 RDI: 0000000000000006 RBP: 0000000009c54920 R08: 00007f27bd0030e0 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 Modules linked in: mlx5_vdpa vringh vhost_iotlb vdpa xt_MASQUERADE nf_conntrack_netlink nfnetlink iptable_nat xt_addrtype xt_conntrack nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser libiscsi ib_umad scsi_transport_iscsi ib_ipoib rdma_cm iw_cm ib_cm mlx5_fwctl mlx5_ib ib_uverbs ib_core mlx5_core CR2: 0000000000000010 Fixes: 82f9378c443c ("net/mlx5: Handle IPsec steering upon master unbind/bind") Signed-off-by: Patrisious Haddad Reviewed-by: Leon Romanovsky Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761136182-918470-5-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ipsec.h | 5 ++++ .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 25 +++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 5d7c15abfcaf..f8eaaf37963b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -342,6 +342,7 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *slave_priv, struct mlx5e_priv *master_priv); void mlx5e_ipsec_send_event(struct mlx5e_priv *priv, int event); +void mlx5e_ipsec_disable_events(struct mlx5e_priv *priv); static inline struct mlx5_core_dev * mlx5e_ipsec_sa2dev(struct mlx5e_ipsec_sa_entry *sa_entry) @@ -387,6 +388,10 @@ static inline void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *sl static inline void mlx5e_ipsec_send_event(struct mlx5e_priv *priv, int event) { } + +static inline void mlx5e_ipsec_disable_events(struct mlx5e_priv *priv) +{ +} #endif #endif /* __MLX5E_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index bf1d2769d4f1..feef86fff4bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -2893,9 +2893,30 @@ void mlx5e_ipsec_handle_mpv_event(int event, struct mlx5e_priv *slave_priv, void mlx5e_ipsec_send_event(struct mlx5e_priv *priv, int event) { - if (!priv->ipsec) - return; /* IPsec not supported */ + if (!priv->ipsec || mlx5_devcom_comp_get_size(priv->devcom) < 2) + return; /* IPsec not supported or no peers */ mlx5_devcom_send_event(priv->devcom, event, event, priv); wait_for_completion(&priv->ipsec->comp); } + +void mlx5e_ipsec_disable_events(struct mlx5e_priv *priv) +{ + struct mlx5_devcom_comp_dev *tmp = NULL; + struct mlx5e_priv *peer_priv; + + if (!priv->devcom) + return; + + if (!mlx5_devcom_for_each_peer_begin(priv->devcom)) + goto out; + + peer_priv = mlx5_devcom_get_next_peer_data(priv->devcom, &tmp); + if (peer_priv) + complete_all(&peer_priv->ipsec->comp); + + mlx5_devcom_for_each_peer_end(priv->devcom); +out: + mlx5_devcom_unregister_component(priv->devcom); + priv->devcom = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 41fd5eee6306..9c46511e7b43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -266,6 +266,7 @@ static void mlx5e_devcom_cleanup_mpv(struct mlx5e_priv *priv) } mlx5_devcom_unregister_component(priv->devcom); + priv->devcom = NULL; } static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) @@ -6120,6 +6121,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) if (mlx5e_monitor_counter_supported(priv)) mlx5e_monitor_counter_cleanup(priv); + mlx5e_ipsec_disable_events(priv); mlx5e_disable_blocking_events(priv); mlx5e_disable_async_events(priv); mlx5_lag_remove_netdev(mdev, priv->netdev); From 434f7349a1f00618a620b316f091bd13a12bc8d2 Mon Sep 17 00:00:00 2001 From: Alexey Klimov Date: Wed, 22 Oct 2025 21:10:12 +0100 Subject: [PATCH 0357/1075] regmap: slimbus: fix bus_context pointer in regmap init calls Commit 4e65bda8273c ("ASoC: wcd934x: fix error handling in wcd934x_codec_parse_data()") revealed the problem in the slimbus regmap. That commit breaks audio playback, for instance, on sdm845 Thundercomm Dragonboard 845c board: Unable to handle kernel paging request at virtual address ffff8000847cbad4 ... CPU: 5 UID: 0 PID: 776 Comm: aplay Not tainted 6.18.0-rc1-00028-g7ea30958b305 #11 PREEMPT Hardware name: Thundercomm Dragonboard 845c (DT) ... Call trace: slim_xfer_msg+0x24/0x1ac [slimbus] (P) slim_read+0x48/0x74 [slimbus] regmap_slimbus_read+0x18/0x24 [regmap_slimbus] _regmap_raw_read+0xe8/0x174 _regmap_bus_read+0x44/0x80 _regmap_read+0x60/0xd8 _regmap_update_bits+0xf4/0x140 _regmap_select_page+0xa8/0x124 _regmap_raw_write_impl+0x3b8/0x65c _regmap_bus_raw_write+0x60/0x80 _regmap_write+0x58/0xc0 regmap_write+0x4c/0x80 wcd934x_hw_params+0x494/0x8b8 [snd_soc_wcd934x] snd_soc_dai_hw_params+0x3c/0x7c [snd_soc_core] __soc_pcm_hw_params+0x22c/0x634 [snd_soc_core] dpcm_be_dai_hw_params+0x1d4/0x38c [snd_soc_core] dpcm_fe_dai_hw_params+0x9c/0x17c [snd_soc_core] snd_pcm_hw_params+0x124/0x464 [snd_pcm] snd_pcm_common_ioctl+0x110c/0x1820 [snd_pcm] snd_pcm_ioctl+0x34/0x4c [snd_pcm] __arm64_sys_ioctl+0xac/0x104 invoke_syscall+0x48/0x104 el0_svc_common.constprop.0+0x40/0xe0 do_el0_svc+0x1c/0x28 el0_svc+0x34/0xec el0t_64_sync_handler+0xa0/0xf0 el0t_64_sync+0x198/0x19c The __devm_regmap_init_slimbus() started to be used instead of __regmap_init_slimbus() after the commit mentioned above and turns out the incorrect bus_context pointer (3rd argument) was used in __devm_regmap_init_slimbus(). It should be just "slimbus" (which is equal to &slimbus->dev). Correct it. The wcd934x codec seems to be the only or the first user of devm_regmap_init_slimbus() but we should fix it till the point where __devm_regmap_init_slimbus() was introduced therefore two "Fixes" tags. While at this, also correct the same argument in __regmap_init_slimbus(). Fixes: 4e65bda8273c ("ASoC: wcd934x: fix error handling in wcd934x_codec_parse_data()") Fixes: 7d6f7fb053ad ("regmap: add SLIMbus support") Cc: stable@vger.kernel.org Cc: Dmitry Baryshkov Cc: Ma Ke Cc: Steev Klimaszewski Cc: Srinivas Kandagatla Reviewed-by: Abel Vesa Signed-off-by: Alexey Klimov Reviewed-by: Dmitry Baryshkov Link: https://patch.msgid.link/20251022201013.1740211-1-alexey.klimov@linaro.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-slimbus.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/base/regmap/regmap-slimbus.c b/drivers/base/regmap/regmap-slimbus.c index 54eb7d227cf4..e523fae73004 100644 --- a/drivers/base/regmap/regmap-slimbus.c +++ b/drivers/base/regmap/regmap-slimbus.c @@ -48,8 +48,7 @@ struct regmap *__regmap_init_slimbus(struct slim_device *slimbus, if (IS_ERR(bus)) return ERR_CAST(bus); - return __regmap_init(&slimbus->dev, bus, &slimbus->dev, config, - lock_key, lock_name); + return __regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__regmap_init_slimbus); @@ -63,8 +62,7 @@ struct regmap *__devm_regmap_init_slimbus(struct slim_device *slimbus, if (IS_ERR(bus)) return ERR_CAST(bus); - return __devm_regmap_init(&slimbus->dev, bus, &slimbus, config, - lock_key, lock_name); + return __devm_regmap_init(&slimbus->dev, bus, slimbus, config, lock_key, lock_name); } EXPORT_SYMBOL_GPL(__devm_regmap_init_slimbus); From eecd7cb64178efb35f89aa5134cf6ce36c0c66db Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 23 Oct 2025 14:01:07 +0200 Subject: [PATCH 0358/1075] slab: fix slab accounting imbalance due to defer_deactivate_slab() Since commit af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") there's a possibility in alloc_single_from_new_slab() that we discard the newly allocated slab if we can't spin and we fail to trylock. As a result we don't perform inc_slabs_node() later in the function. Instead we perform a deferred deactivate_slab() which can either put the unacounted slab on partial list, or discard it immediately while performing dec_slabs_node(). Either way will cause an accounting imbalance. Fix this by not marking the slab as frozen, and using free_slab() instead of deactivate_slab() for non-frozen slabs in free_deferred_objects(). For CONFIG_SLUB_TINY, that's the only possible case. By not using discard_slab() we avoid dec_slabs_node(). Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") Link: https://patch.msgid.link/20251023-fix-slab-accounting-v2-1-0e62d50986ea@suse.cz Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- mm/slub.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 23d8f54e9486..87a1d2f9de0d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3422,7 +3422,6 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s, struct slab *slab, if (!allow_spin && !spin_trylock_irqsave(&n->list_lock, flags)) { /* Unlucky, discard newly allocated slab */ - slab->frozen = 1; defer_deactivate_slab(slab, NULL); return NULL; } @@ -6471,9 +6470,12 @@ static void free_deferred_objects(struct irq_work *work) struct slab *slab = container_of(pos, struct slab, llnode); #ifdef CONFIG_SLUB_TINY - discard_slab(slab->slab_cache, slab); + free_slab(slab->slab_cache, slab); #else - deactivate_slab(slab->slab_cache, slab, slab->flush_freelist); + if (slab->frozen) + deactivate_slab(slab->slab_cache, slab, slab->flush_freelist); + else + free_slab(slab->slab_cache, slab); #endif } } From b98c94eed4a975e0c80b7e90a649a46967376f58 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 22 Oct 2025 11:09:14 +0100 Subject: [PATCH 0359/1075] arm64: mte: Do not warn if the page is already tagged in copy_highpage() The arm64 copy_highpage() assumes that the destination page is newly allocated and not MTE-tagged (PG_mte_tagged unset) and warns accordingly. However, following commit 060913999d7a ("mm: migrate: support poisoned recover from migrate folio"), folio_mc_copy() is called before __folio_migrate_mapping(). If the latter fails (-EAGAIN), the copy will be done again to the same destination page. Since copy_highpage() already set the PG_mte_tagged flag, this second copy will warn. Replace the WARN_ON_ONCE(page already tagged) in the arm64 copy_highpage() with a comment. Reported-by: syzbot+d1974fc28545a3e6218b@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/68dda1ae.a00a0220.102ee.0065.GAE@google.com Reviewed-by: David Hildenbrand Cc: Will Deacon Cc: Kefeng Wang Cc: stable@vger.kernel.org # 6.12.x Reviewed-by: Yang Shi Signed-off-by: Catalin Marinas --- arch/arm64/mm/copypage.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index a86c897017df..cd5912ba617b 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -35,7 +35,7 @@ void copy_highpage(struct page *to, struct page *from) from != folio_page(src, 0)) return; - WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst)); + folio_try_hugetlb_mte_tagging(dst); /* * Populate tags for all subpages. @@ -51,8 +51,13 @@ void copy_highpage(struct page *to, struct page *from) } folio_set_hugetlb_mte_tagged(dst); } else if (page_mte_tagged(from)) { - /* It's a new page, shouldn't have been tagged yet */ - WARN_ON_ONCE(!try_page_mte_tagging(to)); + /* + * Most of the time it's a new page that shouldn't have been + * tagged yet. However, folio migration can end up reusing the + * same page without untagging it. Ignore the warning if the + * page is already tagged. + */ + try_page_mte_tagging(to); mte_copy_page_tags(kto, kfrom); set_page_mte_tagged(to); From 7221b9caf84b3294688228a19273d74ea19a2ee4 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 20 Oct 2025 13:36:43 -0700 Subject: [PATCH 0360/1075] libbpf: Fix powerpc's stack register definition in bpf_tracing.h retsnoop's build on powerpc (ppc64le) architecture ([0]) failed due to wrong definition of PT_REGS_SP() macro. Looking at powerpc's implementation of stack unwinding in perf_callchain_user_64() clearly shows that stack pointer register is gpr[1]. Fix libbpf's definition of __PT_SP_REG for powerpc to fix all this. [0] https://kojipkgs.fedoraproject.org/work/tasks/1544/137921544/build.log Fixes: 138d6153a139 ("samples/bpf: Enable powerpc support") Signed-off-by: Andrii Nakryiko Reviewed-by: Naveen N Rao (AMD) Link: https://lore.kernel.org/r/20251020203643.989467-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf_tracing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index a8f6cd4841b0..dbe32a5d02cd 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -311,7 +311,7 @@ struct pt_regs___arm64 { #define __PT_RET_REG regs[31] #define __PT_FP_REG __unsupported__ #define __PT_RC_REG gpr[3] -#define __PT_SP_REG sp +#define __PT_SP_REG gpr[1] #define __PT_IP_REG nip #elif defined(bpf_target_sparc) From 8f067aa59430266386b83c18b983ca583faa6a11 Mon Sep 17 00:00:00 2001 From: Yuhao Jiang Date: Wed, 22 Oct 2025 15:07:04 -0500 Subject: [PATCH 0361/1075] ACPI: video: Fix use-after-free in acpi_video_switch_brightness() The switch_brightness_work delayed work accesses device->brightness and device->backlight, freed by acpi_video_dev_unregister_backlight() during device removal. If the work executes after acpi_video_bus_unregister_backlight() frees these resources, it causes a use-after-free when acpi_video_switch_brightness() dereferences device->brightness or device->backlight. Fix this by calling cancel_delayed_work_sync() for each device's switch_brightness_work in acpi_video_bus_remove_notify_handler() after removing the notify handler that queues the work. This ensures the work completes before the memory is freed. Fixes: 8ab58e8e7e097 ("ACPI / video: Fix backlight taking 2 steps on a brightness up/down keypress") Cc: All applicable Signed-off-by: Yuhao Jiang Reviewed-by: Hans de Goede [ rjw: Changelog edit ] Link: https://patch.msgid.link/20251022200704.2655507-1-danisjiang@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_video.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 103f29661576..be8e7e18abca 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -1959,8 +1959,10 @@ static void acpi_video_bus_remove_notify_handler(struct acpi_video_bus *video) struct acpi_video_device *dev; mutex_lock(&video->device_list_lock); - list_for_each_entry(dev, &video->video_device_list, entry) + list_for_each_entry(dev, &video->video_device_list, entry) { acpi_video_dev_remove_notify_handler(dev); + cancel_delayed_work_sync(&dev->switch_brightness_work); + } mutex_unlock(&video->device_list_lock); acpi_video_bus_stop_devices(video); From dfab67879bed6800cc512e320167b969ebc6d5a9 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 23 Oct 2025 23:56:30 +0100 Subject: [PATCH 0362/1075] MAINTAINERS: Update Kconfig section Masahiro Yamada stepped down as Kbuild and Kconfig maintainer in commit 8d6841d5cb20 ("MAINTAINERS: hand over Kbuild maintenance"), leaving Kconfig officially orphaned and handing Kbuild over to Nicolas and myself. Since then, there have been a few simple patches to Kconfig that have ended up on the linux-kbuild mailing list without clear direction on who will take them, as they are not really sent to anybody officially, although the list is obviously watched by the Kbuild maintainers. Make Nicolas and I official maintainers of Kconfig in "Odd Fixes" status, similar to Kbuild, so that the subsystem has clear points of contact for contributors, even if significant contributions may not be accepted. Additionally, add the Kbuild tree to this section. Acked-by: Nicolas Schier Acked-by: Randy Dunlap Link: https://patch.msgid.link/20251023-update-kconfig-maintainers-v1-1-0ebd5b4ecced@kernel.org Signed-off-by: Nathan Chancellor --- MAINTAINERS | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..af0470c31c11 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13415,9 +13415,12 @@ F: mm/kasan/ F: scripts/Makefile.kasan KCONFIG +M: Nathan Chancellor +M: Nicolas Schier L: linux-kbuild@vger.kernel.org -S: Orphan +S: Odd Fixes Q: https://patchwork.kernel.org/project/linux-kbuild/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kbuild/linux.git F: Documentation/kbuild/kconfig* F: scripts/Kconfig.include F: scripts/kconfig/ From 79a6f2da168543c0431ade57428f673c19c5b72f Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Tue, 21 Oct 2025 01:04:40 +0800 Subject: [PATCH 0363/1075] ASoC: mediatek: Fix double pm_runtime_disable in remove functions Both mt8195-afe-pcm and mt8365-afe-pcm drivers use devm_pm_runtime_enable() in probe function, which automatically calls pm_runtime_disable() on device removal via devres mechanism. However, the remove callbacks explicitly call pm_runtime_disable() again, resulting in double pm_runtime_disable() calls. Fix by removing the redundant pm_runtime_disable() calls from remove functions, letting the devres framework handle it automatically. Fixes: 2ca0ec01d49c ("ASoC: mediatek: mt8195-afe-pcm: Simplify runtime PM during probe") Fixes: e1991d102bc2 ("ASoC: mediatek: mt8365: Add the AFE driver support") Signed-off-by: Haotian Zhang Link: https://patch.msgid.link/20251020170440.585-1-vulab@iscas.ac.cn Signed-off-by: Mark Brown --- sound/soc/mediatek/mt8195/mt8195-afe-pcm.c | 1 - sound/soc/mediatek/mt8365/mt8365-afe-pcm.c | 1 - 2 files changed, 2 deletions(-) diff --git a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c index 5d025ad72263..c63b3444bc17 100644 --- a/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c +++ b/sound/soc/mediatek/mt8195/mt8195-afe-pcm.c @@ -3176,7 +3176,6 @@ static int mt8195_afe_pcm_dev_probe(struct platform_device *pdev) static void mt8195_afe_pcm_dev_remove(struct platform_device *pdev) { - pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt8195_afe_runtime_suspend(&pdev->dev); } diff --git a/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c b/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c index 10793bbe9275..d48252cd96ac 100644 --- a/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c +++ b/sound/soc/mediatek/mt8365/mt8365-afe-pcm.c @@ -2238,7 +2238,6 @@ static void mt8365_afe_pcm_dev_remove(struct platform_device *pdev) mt8365_afe_disable_top_cg(afe, MT8365_TOP_CG_AFE); - pm_runtime_disable(&pdev->dev); if (!pm_runtime_status_suspended(&pdev->dev)) mt8365_afe_runtime_suspend(&pdev->dev); } From 6fab32bb6508abbb8b7b1c5498e44f0c32320ed5 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 22 Oct 2025 16:36:25 +1100 Subject: [PATCH 0364/1075] MAINTAINERS: add Mark Brown as a linux-next maintainer Mark has been kindly helping fill in when I have been unavailable over the past several years. He has also put his hand up to take over linux-next maintenance when I finally decide to stop (which may be some time yet ;-) ). Signed-off-by: Stephen Rothwell Acked-by: Mark Brown Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 25463fa36508..5889df9de210 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14395,6 +14395,7 @@ F: tools/memory-model/ LINUX-NEXT TREE M: Stephen Rothwell +M: Mark Brown L: linux-next@vger.kernel.org S: Supported B: mailto:linux-next@vger.kernel.org and the appropriate development tree From 420c84c330d1688b8c764479e5738bbdbf0a33de Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Wed, 22 Oct 2025 10:40:07 +0800 Subject: [PATCH 0365/1075] usbnet: Prevents free active kevent The root cause of this issue are: 1. When probing the usbnet device, executing usbnet_link_change(dev, 0, 0); put the kevent work in global workqueue. However, the kevent has not yet been scheduled when the usbnet device is unregistered. Therefore, executing free_netdev() results in the "free active object (kevent)" error reported here. 2. Another factor is that when calling usbnet_disconnect()->unregister_netdev(), if the usbnet device is up, ndo_stop() is executed to cancel the kevent. However, because the device is not up, ndo_stop() is not executed. The solution to this problem is to cancel the kevent before executing free_netdev(). Fixes: a69e617e533e ("usbnet: Fix linkwatch use-after-free on disconnect") Reported-by: Sam Sun Closes: https://syzkaller.appspot.com/bug?extid=8bfd7bcc98f7300afb84 Signed-off-by: Lizhi Xu Link: https://patch.msgid.link/20251022024007.1831898-1-lizhi.xu@windriver.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index bf01f2728531..697cd9d866d3 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1659,6 +1659,8 @@ void usbnet_disconnect (struct usb_interface *intf) net = dev->net; unregister_netdev (net); + cancel_work_sync(&dev->kevent); + while ((urb = usb_get_from_anchor(&dev->deferred))) { dev_kfree_skb(urb->context); kfree(urb->sg); From 1ab665817448c31f4758dce43c455bd4c5e460aa Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 22 Oct 2025 22:56:30 +0700 Subject: [PATCH 0366/1075] virtio-net: drop the multi-buffer XDP packet in zerocopy In virtio-net, we have not yet supported multi-buffer XDP packet in zerocopy mode when there is a binding XDP program. However, in that case, when receiving multi-buffer XDP packet, we skip the XDP program and return XDP_PASS. As a result, the packet is passed to normal network stack which is an incorrect behavior (e.g. a XDP program for packet count is installed, multi-buffer XDP packet arrives and does go through XDP program. As a result, the packet count does not increase but the packet is still received from network stack).This commit instead returns XDP_ABORTED in that case. Fixes: 99c861b44eb1 ("virtio_net: xsk: rx: support recv merge mode") Cc: stable@vger.kernel.org Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Signed-off-by: Bui Quang Minh Link: https://patch.msgid.link/20251022155630.49272-1-minhquangbui99@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a757cbcab87f..8e8a179aaa49 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1379,9 +1379,14 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct ret = XDP_PASS; rcu_read_lock(); prog = rcu_dereference(rq->xdp_prog); - /* TODO: support multi buffer. */ - if (prog && num_buf == 1) - ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); + if (prog) { + /* TODO: support multi buffer. */ + if (num_buf == 1) + ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, + stats); + else + ret = XDP_ABORTED; + } rcu_read_unlock(); switch (ret) { From 246aca5b2a2c4ad3e75c2eff616f5532019a92d2 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 23 Oct 2025 19:43:49 +0900 Subject: [PATCH 0367/1075] firewire: core: fix __must_hold() annotation The variable name passed to __must_hold() annotation is invalid. This commit fixes it. Fixes: 420bd7068cbf ("firewire: core: use spin lock specific to transaction") Link: https://lore.kernel.org/r/20251023104349.415310-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index dd3656a0c1ff..c65f491c54d0 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -269,7 +269,7 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel, } static int allocate_tlabel(struct fw_card *card) -__must_hold(&card->transactions_lock) +__must_hold(&card->transactions.lock) { int tlabel; From df5192d9bb0e38bf831fb93e8026e346aa017ca8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 23 Oct 2025 13:06:26 -0500 Subject: [PATCH 0368/1075] PCI/ASPM: Enable only L0s and L1 for devicetree platforms f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms") enabled Clock Power Management and L1 PM Substates, but those features depend on CLKREQ# and possibly other device-specific configuration. We don't know whether CLKREQ# is supported, so we shouldn't blindly enable Clock PM and L1 PM Substates. Enable only ASPM L0s and L1, and only when both ends of the link advertise support for them. Fixes: f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms") Reported-by: Christian Zigotzky Link: https://lore.kernel.org/r/db5c95a1-cf3e-46f9-8045-a1b04908051a@xenosoft.de/ Reported-by: FUKAUMI Naoki Closes: https://lore.kernel.org/r/22594781424C5C98+22cb5d61-19b1-4353-9818-3bb2b311da0b@radxa.com/ Reported-by: Herve Codina Link: https://lore.kernel.org/r/20251015101304.3ec03e6b@bootlin.com/ Reported-by: Diederik de Haas Closes: https://lore.kernel.org/r/DDJXHRIRGTW9.GYC2ULZ5WQAL@cknow-tech.com/ Signed-off-by: Bjorn Helgaas Tested-by: FUKAUMI Naoki Tested-by: Diederik de Haas Acked-by: Dragan Simic Link: https://patch.msgid.link/20251023180645.1304701-1-helgaas@kernel.org --- drivers/pci/pcie/aspm.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 7cc8281e7011..79b965158473 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -243,8 +243,7 @@ struct pcie_link_state { /* Clock PM state */ u32 clkpm_capable:1; /* Clock PM capable? */ u32 clkpm_enabled:1; /* Current Clock PM state */ - u32 clkpm_default:1; /* Default Clock PM state by BIOS or - override */ + u32 clkpm_default:1; /* Default Clock PM state by BIOS */ u32 clkpm_disable:1; /* Clock PM disabled */ }; @@ -376,18 +375,6 @@ static void pcie_set_clkpm(struct pcie_link_state *link, int enable) pcie_set_clkpm_nocheck(link, enable); } -static void pcie_clkpm_override_default_link_state(struct pcie_link_state *link, - int enabled) -{ - struct pci_dev *pdev = link->downstream; - - /* For devicetree platforms, enable ClockPM by default */ - if (of_have_populated_dt() && !enabled) { - link->clkpm_default = 1; - pci_info(pdev, "ASPM: DT platform, enabling ClockPM\n"); - } -} - static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) { int capable = 1, enabled = 1; @@ -410,7 +397,6 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) } link->clkpm_enabled = enabled; link->clkpm_default = enabled; - pcie_clkpm_override_default_link_state(link, enabled); link->clkpm_capable = capable; link->clkpm_disable = blacklist ? 1 : 0; } @@ -811,19 +797,17 @@ static void pcie_aspm_override_default_link_state(struct pcie_link_state *link) struct pci_dev *pdev = link->downstream; u32 override; - /* For devicetree platforms, enable all ASPM states by default */ + /* For devicetree platforms, enable L0s and L1 by default */ if (of_have_populated_dt()) { - link->aspm_default = PCIE_LINK_STATE_ASPM_ALL; + if (link->aspm_support & PCIE_LINK_STATE_L0S) + link->aspm_default |= PCIE_LINK_STATE_L0S; + if (link->aspm_support & PCIE_LINK_STATE_L1) + link->aspm_default |= PCIE_LINK_STATE_L1; override = link->aspm_default & ~link->aspm_enabled; if (override) - pci_info(pdev, "ASPM: DT platform, enabling%s%s%s%s%s%s%s\n", - FLAG(override, L0S_UP, " L0s-up"), - FLAG(override, L0S_DW, " L0s-dw"), - FLAG(override, L1, " L1"), - FLAG(override, L1_1, " ASPM-L1.1"), - FLAG(override, L1_2, " ASPM-L1.2"), - FLAG(override, L1_1_PCIPM, " PCI-PM-L1.1"), - FLAG(override, L1_2_PCIPM, " PCI-PM-L1.2")); + pci_info(pdev, "ASPM: default states%s%s\n", + FLAG(override, L0S, " L0s"), + FLAG(override, L1, " L1")); } } From 6f1cbf6d6fd13fc169dde14e865897924cdc4bbd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 24 Oct 2025 09:34:59 +0800 Subject: [PATCH 0369/1075] io_uring: fix buffer auto-commit for multishot uring_cmd Commit 620a50c92700 ("io_uring: uring_cmd: add multishot support") added multishot uring_cmd support with explicit buffer upfront commit via io_uring_mshot_cmd_post_cqe(). However, the buffer selection path in io_ring_buffer_select() was auto-committing buffers for non-pollable files, which conflicts with uring_cmd's explicit upfront commit model. This way consumes the whole selected buffer immediately, and causes failure on the following buffer selection. Fix this by checking uring_cmd to identify operations that handle buffer commit explicitly, and skip auto-commit for these operations. Cc: Caleb Sander Mateos Fixes: 620a50c92700 ("io_uring: uring_cmd: add multishot support") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index aad655e38672..a727e020fe03 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -155,6 +155,27 @@ static int io_provided_buffers_select(struct io_kiocb *req, size_t *len, return 1; } +static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags) +{ + /* + * If we came in unlocked, we have no choice but to consume the + * buffer here, otherwise nothing ensures that the buffer won't + * get used by others. This does mean it'll be pinned until the + * IO completes, coming in unlocked means we're being called from + * io-wq context and there may be further retries in async hybrid + * mode. For the locked case, the caller must call commit when + * the transfer completes (or if we get -EAGAIN and must poll of + * retry). + */ + if (issue_flags & IO_URING_F_UNLOCKED) + return true; + + /* uring_cmd commits kbuf upfront, no need to auto-commit */ + if (!io_file_can_poll(req) && req->opcode != IORING_OP_URING_CMD) + return true; + return false; +} + static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl, unsigned int issue_flags) @@ -181,17 +202,7 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, sel.buf_list = bl; sel.addr = u64_to_user_ptr(buf->addr); - if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { - /* - * If we came in unlocked, we have no choice but to consume the - * buffer here, otherwise nothing ensures that the buffer won't - * get used by others. This does mean it'll be pinned until the - * IO completes, coming in unlocked means we're being called from - * io-wq context and there may be further retries in async hybrid - * mode. For the locked case, the caller must call commit when - * the transfer completes (or if we get -EAGAIN and must poll of - * retry). - */ + if (io_should_commit(req, issue_flags)) { io_kbuf_commit(req, sel.buf_list, *len, 1); sel.buf_list = NULL; } From dd6940f5c7dbee7ae70708f4c8967c3c8cb1d965 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 15 Oct 2025 17:05:27 +0200 Subject: [PATCH 0370/1075] smb: server: let free_transport() wait for SMBDIRECT_SOCKET_DISCONNECTED We should wait for the rdma_cm to become SMBDIRECT_SOCKET_DISCONNECTED! At least on the client side (with similar code) wait_event_interruptible() often returns with -ERESTARTSYS instead of waiting for SMBDIRECT_SOCKET_DISCONNECTED. We should use wait_event() here too, which makes the code be identical in client and server, which will help when moving to common functions. Fixes: b31606097de8 ("smb: server: move smb_direct_disconnect_rdma_work() into free_transport()") Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 8aaa950a9449..89b02efdba0c 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -451,11 +451,10 @@ static void free_transport(struct smb_direct_transport *t) struct smbdirect_recv_io *recvmsg; disable_work_sync(&sc->disconnect_work); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) smb_direct_disconnect_rdma_work(&sc->disconnect_work); - wait_event_interruptible(sc->status_wait, - sc->status == SMBDIRECT_SOCKET_DISCONNECTED); - } + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); /* * Wake up all waiters in all wait queues From 5127be409c6c3815c4a7d8f6d88043e44f9b9543 Mon Sep 17 00:00:00 2001 From: Nitin Rawat Date: Sun, 12 Oct 2025 23:08:28 +0530 Subject: [PATCH 0371/1075] scsi: ufs: ufs-qcom: Fix UFS OCP issue during UFS power down (PC=3) According to UFS specifications, the power-off sequence for a UFS device includes: - Sending an SSU command with Power_Condition=3 and await a response. - Asserting RST_N low. - Turning off REF_CLK. - Turning off VCC. - Turning off VCCQ/VCCQ2. As part of ufs shutdown, after the SSU command completion, asserting hardware reset (HWRST) triggers the device firmware to wake up and execute its reset routine. This routine initializes hardware blocks and takes a few milliseconds to complete. During this time, the ICCQ draws a large current. This large ICCQ current may cause issues for the regulator which is supplying power to UFS, because the turn off request from UFS driver to the regulator framework will be immediately followed by low power mode(LPM) request by regulator framework. This is done by framework because UFS which is the only client is requesting for disable. So if the rail is still in the process of shutting down while ICCQ exceeds LPM current thresholds, and LPM mode is activated in hardware during this state, it may trigger an overcurrent protection (OCP) fault in the regulator. To prevent this, a 10ms delay is added after asserting HWRST. This allows the reset operation to complete while power rails remain active and in high-power mode. Currently there is no way for Host to query whether the reset is completed or not and hence this the delay is based on experiments with Qualcomm UFS controllers across multiple UFS vendors. Signed-off-by: Nitin Rawat Reviewed-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20251012173828.9880-1-nitin.rawat@oss.qualcomm.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufs-qcom.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 3e83dc51d538..eba0e6617483 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -740,8 +740,21 @@ static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, /* reset the connected UFS device during power down */ - if (ufs_qcom_is_link_off(hba) && host->device_reset) + if (ufs_qcom_is_link_off(hba) && host->device_reset) { ufs_qcom_device_reset_ctrl(hba, true); + /* + * After sending the SSU command, asserting the rst_n + * line causes the device firmware to wake up and + * execute its reset routine. + * + * During this process, the device may draw current + * beyond the permissible limit for low-power mode (LPM). + * A 10ms delay, based on experimental observations, + * allows the UFS device to complete its hardware reset + * before transitioning the power rail to LPM. + */ + usleep_range(10000, 11000); + } return ufs_qcom_ice_suspend(host); } From c74dc8ab47c1ec3927f63ca83b542c363249b3d8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Oct 2025 13:00:53 -0700 Subject: [PATCH 0372/1075] scsi: ufs: core: Fix a race condition related to the "hid" attribute group ufs_sysfs_add_nodes() is called concurrently with ufs_get_device_desc(). This may cause the following code to be called before ufs_sysfs_add_nodes(): sysfs_update_group(&hba->dev->kobj, &ufs_sysfs_hid_group); If this happens, ufs_sysfs_add_nodes() triggers a kernel warning and fails. Fix this by calling ufs_sysfs_add_nodes() before SCSI LUNs are scanned since the sysfs_update_group() call happens from the context of thread that executes ufshcd_async_scan(). This patch fixes the following kernel warning: sysfs: cannot create duplicate filename '/devices/platform/3c2d0000.ufs/hid' Workqueue: async async_run_entry_fn Call trace: dump_backtrace+0xfc/0x17c show_stack+0x18/0x28 dump_stack_lvl+0x40/0x104 dump_stack+0x18/0x3c sysfs_warn_dup+0x6c/0xc8 internal_create_group+0x1c8/0x504 sysfs_create_groups+0x38/0x9c ufs_sysfs_add_nodes+0x20/0x58 ufshcd_init+0x1114/0x134c ufshcd_pltfrm_init+0x728/0x7d8 ufs_google_probe+0x30/0x84 platform_probe+0xa0/0xe0 really_probe+0x114/0x454 __driver_probe_device+0xa4/0x160 driver_probe_device+0x44/0x23c __device_attach_driver+0x15c/0x1f4 bus_for_each_drv+0x10c/0x168 __device_attach_async_helper+0x80/0xf8 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 ufshcd 3c2d0000.ufs: ufs_sysfs_add_nodes: sysfs groups creation failed (err = -17) Cc: Daniel Lee Fixes: bb7663dec67b ("scsi: ufs: sysfs: Make HID attributes visible") Signed-off-by: Bart Van Assche Link: https://patch.msgid.link/20251014200118.3390839-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 9ca27de4767a..322641457a2d 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10891,8 +10891,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) if (err) goto out_disable; - async_schedule(ufshcd_async_scan, hba); ufs_sysfs_add_nodes(hba->dev); + async_schedule(ufshcd_async_scan, hba); device_enable_async_suspend(dev); ufshcd_pm_qos_init(hba); From c0e37ac6a5d4c4bc33b9c4408d22714fe370a1b0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 14 Oct 2025 13:00:54 -0700 Subject: [PATCH 0373/1075] scsi: ufs: core: Reduce link startup failure logging Some systems, e.g. Rock 4D, have a pluggable UFS module. Link startup fails systematically on these systems. If no UFS module has been plugged in, more than fourty lines are logged after the "link startup failed" message. Avoid this by reducing link startup failure logging. An intended side effect of this patch is that scsi_host_busy() is not called before scsi_add_host() is called. Commit 995412e23bb2 ("blk-mq: Replace tags->lock with SRCU for tag iterators") introduced a regression - the warning shown below is triggered during every boot. This patch fixes that regression. Call trace: __srcu_read_lock+0x30/0x80 (P) blk_mq_tagset_busy_iter+0x44/0x300 scsi_host_busy+0x38/0x70 ufshcd_print_host_state+0x34/0x1bc ufshcd_link_startup.constprop.0+0xe4/0x2e0 ufshcd_init+0x944/0xf80 ufshcd_pltfrm_init+0x504/0x820 ufs_rockchip_probe+0x2c/0x88 platform_probe+0x5c/0xa4 really_probe+0xc0/0x38c __driver_probe_device+0x7c/0x150 driver_probe_device+0x40/0x120 __driver_attach+0xc8/0x1e0 bus_for_each_dev+0x7c/0xdc driver_attach+0x24/0x30 bus_add_driver+0x110/0x230 driver_register+0x68/0x130 __platform_driver_register+0x20/0x2c ufs_rockchip_pltform_init+0x1c/0x28 do_one_initcall+0x60/0x1e0 kernel_init_freeable+0x248/0x2c4 kernel_init+0x20/0x140 ret_from_fork+0x10/0x20 Reported-by: Sebastian Reichel Closes: https://lore.kernel.org/linux-block/pnezafputodmqlpumwfbn644ohjybouveehcjhz2hmhtcf2rka@sdhoiivync4y/ Signed-off-by: Bart Van Assche Link: https://patch.msgid.link/20251014200118.3390839-3-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 322641457a2d..5d6297aa5c28 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5131,12 +5131,8 @@ static int ufshcd_link_startup(struct ufs_hba *hba) ufshcd_readl(hba, REG_UIC_ERROR_CODE_PHY_ADAPTER_LAYER); ret = ufshcd_make_hba_operational(hba); out: - if (ret) { + if (ret) dev_err(hba->dev, "link startup failed %d\n", ret); - ufshcd_print_host_state(hba); - ufshcd_print_pwr_info(hba); - ufshcd_print_evt_hist(hba); - } return ret; } From ce29214ada6d08dbde1eeb5a69c3b09ddf3da146 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 21 Oct 2025 17:55:36 -0700 Subject: [PATCH 0374/1075] drm/xe: Check return value of GGTT workqueue allocation Workqueue allocation can fail, so check the return value of the GGTT workqueue allocation and fail driver initialization if the allocation fails. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20251022005538.828980-2-matthew.brost@intel.com (cherry picked from commit 1f1314e8e71385bae319e43082b798c11f6648bc) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_ggtt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7fdd0a97a628..5edc0cad47e2 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -292,6 +292,9 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->pt_ops = &xelp_pt_ops; ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); + if (!ggtt->wq) + return -ENOMEM; + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); From 58764259ebe0c9efd569194444629f6b26f86583 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 8 Oct 2025 01:41:44 +0200 Subject: [PATCH 0375/1075] ACPI: fan: Use ACPI handle when retrieving _FST Usage of the ACPI device should be phased out in the future, as the driver itself is now using the platform bus. Replace any usage of struct acpi_device in acpi_fan_get_fst() to allow users to drop usage of struct acpi_device. Also extend the integer check to all three package elements. Signed-off-by: Armin Wolf Link: https://patch.msgid.link/20251007234149.2769-2-W_Armin@gmx.de Signed-off-by: Rafael J. Wysocki --- drivers/acpi/fan.h | 3 ++- drivers/acpi/fan_attr.c | 2 +- drivers/acpi/fan_core.c | 34 ++++++++++++++++++++++------------ drivers/acpi/fan_hwmon.c | 3 +-- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h index 8a28a72a7c6a..d39bb6fd1326 100644 --- a/drivers/acpi/fan.h +++ b/drivers/acpi/fan.h @@ -49,6 +49,7 @@ struct acpi_fan_fst { }; struct acpi_fan { + acpi_handle handle; bool acpi4; bool has_fst; struct acpi_fan_fif fif; @@ -59,7 +60,7 @@ struct acpi_fan { struct device_attribute fine_grain_control; }; -int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst); +int acpi_fan_get_fst(acpi_handle handle, struct acpi_fan_fst *fst); int acpi_fan_create_attributes(struct acpi_device *device); void acpi_fan_delete_attributes(struct acpi_device *device); diff --git a/drivers/acpi/fan_attr.c b/drivers/acpi/fan_attr.c index c1afb7b5ed3d..9b7fa52f3c2a 100644 --- a/drivers/acpi/fan_attr.c +++ b/drivers/acpi/fan_attr.c @@ -55,7 +55,7 @@ static ssize_t show_fan_speed(struct device *dev, struct device_attribute *attr, struct acpi_fan_fst fst; int status; - status = acpi_fan_get_fst(acpi_dev, &fst); + status = acpi_fan_get_fst(acpi_dev->handle, &fst); if (status) return status; diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c index 04ff608f2ff0..ea2c646c470c 100644 --- a/drivers/acpi/fan_core.c +++ b/drivers/acpi/fan_core.c @@ -44,25 +44,30 @@ static int fan_get_max_state(struct thermal_cooling_device *cdev, unsigned long return 0; } -int acpi_fan_get_fst(struct acpi_device *device, struct acpi_fan_fst *fst) +int acpi_fan_get_fst(acpi_handle handle, struct acpi_fan_fst *fst) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; acpi_status status; int ret = 0; - status = acpi_evaluate_object(device->handle, "_FST", NULL, &buffer); - if (ACPI_FAILURE(status)) { - dev_err(&device->dev, "Get fan state failed\n"); - return -ENODEV; - } + status = acpi_evaluate_object(handle, "_FST", NULL, &buffer); + if (ACPI_FAILURE(status)) + return -EIO; obj = buffer.pointer; - if (!obj || obj->type != ACPI_TYPE_PACKAGE || - obj->package.count != 3 || - obj->package.elements[1].type != ACPI_TYPE_INTEGER) { - dev_err(&device->dev, "Invalid _FST data\n"); - ret = -EINVAL; + if (!obj) + return -ENODATA; + + if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count != 3) { + ret = -EPROTO; + goto err; + } + + if (obj->package.elements[0].type != ACPI_TYPE_INTEGER || + obj->package.elements[1].type != ACPI_TYPE_INTEGER || + obj->package.elements[2].type != ACPI_TYPE_INTEGER) { + ret = -EPROTO; goto err; } @@ -81,7 +86,7 @@ static int fan_get_state_acpi4(struct acpi_device *device, unsigned long *state) struct acpi_fan_fst fst; int status, i; - status = acpi_fan_get_fst(device, &fst); + status = acpi_fan_get_fst(device->handle, &fst); if (status) return status; @@ -311,11 +316,16 @@ static int acpi_fan_probe(struct platform_device *pdev) struct acpi_device *device = ACPI_COMPANION(&pdev->dev); char *name; + if (!device) + return -ENODEV; + fan = devm_kzalloc(&pdev->dev, sizeof(*fan), GFP_KERNEL); if (!fan) { dev_err(&device->dev, "No memory for fan\n"); return -ENOMEM; } + + fan->handle = device->handle; device->driver_data = fan; platform_set_drvdata(pdev, fan); diff --git a/drivers/acpi/fan_hwmon.c b/drivers/acpi/fan_hwmon.c index e8d90605106e..4209a9923efc 100644 --- a/drivers/acpi/fan_hwmon.c +++ b/drivers/acpi/fan_hwmon.c @@ -93,13 +93,12 @@ static umode_t acpi_fan_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_ static int acpi_fan_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { - struct acpi_device *adev = to_acpi_device(dev->parent); struct acpi_fan *fan = dev_get_drvdata(dev); struct acpi_fan_fps *fps; struct acpi_fan_fst fst; int ret; - ret = acpi_fan_get_fst(adev, &fst); + ret = acpi_fan_get_fst(fan->handle, &fst); if (ret < 0) return ret; From d91a1d129b63614fa4c2e45e60918409ce36db7e Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Wed, 8 Oct 2025 01:41:46 +0200 Subject: [PATCH 0376/1075] ACPI: fan: Use platform device for devres-related actions Device-managed resources are cleaned up when the driver unbinds from the underlying device. In our case this is the platform device as this driver is a platform driver. Registering device-managed resources on the associated ACPI device will thus result in a resource leak when this driver unbinds. Ensure that any device-managed resources are only registered on the platform device to ensure that they are cleaned up during removal. Fixes: 35c50d853adc ("ACPI: fan: Add hwmon support") Signed-off-by: Armin Wolf Cc: 6.11+ # 6.11+ Link: https://patch.msgid.link/20251007234149.2769-4-W_Armin@gmx.de Signed-off-by: Rafael J. Wysocki --- drivers/acpi/fan.h | 4 ++-- drivers/acpi/fan_core.c | 2 +- drivers/acpi/fan_hwmon.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h index d39bb6fd1326..bedbab0e8e4e 100644 --- a/drivers/acpi/fan.h +++ b/drivers/acpi/fan.h @@ -65,9 +65,9 @@ int acpi_fan_create_attributes(struct acpi_device *device); void acpi_fan_delete_attributes(struct acpi_device *device); #if IS_REACHABLE(CONFIG_HWMON) -int devm_acpi_fan_create_hwmon(struct acpi_device *device); +int devm_acpi_fan_create_hwmon(struct device *dev); #else -static inline int devm_acpi_fan_create_hwmon(struct acpi_device *device) { return 0; }; +static inline int devm_acpi_fan_create_hwmon(struct device *dev) { return 0; }; #endif #endif diff --git a/drivers/acpi/fan_core.c b/drivers/acpi/fan_core.c index ea2c646c470c..46e7fe7a506d 100644 --- a/drivers/acpi/fan_core.c +++ b/drivers/acpi/fan_core.c @@ -347,7 +347,7 @@ static int acpi_fan_probe(struct platform_device *pdev) } if (fan->has_fst) { - result = devm_acpi_fan_create_hwmon(device); + result = devm_acpi_fan_create_hwmon(&pdev->dev); if (result) return result; diff --git a/drivers/acpi/fan_hwmon.c b/drivers/acpi/fan_hwmon.c index 4209a9923efc..4b2c2007f2d7 100644 --- a/drivers/acpi/fan_hwmon.c +++ b/drivers/acpi/fan_hwmon.c @@ -166,12 +166,12 @@ static const struct hwmon_chip_info acpi_fan_hwmon_chip_info = { .info = acpi_fan_hwmon_info, }; -int devm_acpi_fan_create_hwmon(struct acpi_device *device) +int devm_acpi_fan_create_hwmon(struct device *dev) { - struct acpi_fan *fan = acpi_driver_data(device); + struct acpi_fan *fan = dev_get_drvdata(dev); struct device *hdev; - hdev = devm_hwmon_device_register_with_info(&device->dev, "acpi_fan", fan, - &acpi_fan_hwmon_chip_info, NULL); + hdev = devm_hwmon_device_register_with_info(dev, "acpi_fan", fan, &acpi_fan_hwmon_chip_info, + NULL); return PTR_ERR_OR_ZERO(hdev); } From 7df699c2132f36359f8f79e6a163c3b3fe0b0e3d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 10 Sep 2025 12:47:02 +0200 Subject: [PATCH 0377/1075] media: v4l2-subdev / pdx86: int3472: Use "privacy" as con_id for the privacy LED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During DT-binding review for extending the V4L2 camera sensor privacy LED support to systems using devicetree, it has come up that having a "-led" suffix for the LED name / con_id is undesirable since it already is clear that it is a LED. Drop the "-led" suffix from the con_id in both the lookup table in the int3472 code, as well as from the con_id led_get() argument in the v4l2-subdev code. Signed-off-by: Hans de Goede Acked-by: Ilpo Järvinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/v4l2-core/v4l2-subdev.c | 2 +- drivers/platform/x86/intel/int3472/led.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c index 1da953629010..25e66bf18f5f 100644 --- a/drivers/media/v4l2-core/v4l2-subdev.c +++ b/drivers/media/v4l2-core/v4l2-subdev.c @@ -2608,7 +2608,7 @@ EXPORT_SYMBOL_GPL(v4l2_subdev_is_streaming); int v4l2_subdev_get_privacy_led(struct v4l2_subdev *sd) { #if IS_REACHABLE(CONFIG_LEDS_CLASS) - sd->privacy_led = led_get(sd->dev, "privacy-led"); + sd->privacy_led = led_get(sd->dev, "privacy"); if (IS_ERR(sd->privacy_led) && PTR_ERR(sd->privacy_led) != -ENOENT) return dev_err_probe(sd->dev, PTR_ERR(sd->privacy_led), "getting privacy LED\n"); diff --git a/drivers/platform/x86/intel/int3472/led.c b/drivers/platform/x86/intel/int3472/led.c index f1d6d7b0cb75..b1d84b968112 100644 --- a/drivers/platform/x86/intel/int3472/led.c +++ b/drivers/platform/x86/intel/int3472/led.c @@ -43,7 +43,7 @@ int skl_int3472_register_pled(struct int3472_discrete_device *int3472, struct gp int3472->pled.lookup.provider = int3472->pled.name; int3472->pled.lookup.dev_id = int3472->sensor_name; - int3472->pled.lookup.con_id = "privacy-led"; + int3472->pled.lookup.con_id = "privacy"; led_add_lookup(&int3472->pled.lookup); return 0; From 758dbc756aad429da11c569c0d067f7fd032bcf7 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Tue, 21 Oct 2025 10:36:17 +0000 Subject: [PATCH 0378/1075] media: uvcvideo: Use heuristic to find stream entity Some devices, like the Grandstream GUV3100 webcam, have an invalid UVC descriptor where multiple entities share the same ID, this is invalid and makes it impossible to make a proper entity tree without heuristics. We have recently introduced a change in the way that we handle invalid entities that has caused a regression on broken devices. Implement a new heuristic to handle these devices properly. Reported-by: Angel4005 Closes: https://lore.kernel.org/linux-media/CAOzBiVuS7ygUjjhCbyWg-KiNx+HFTYnqH5+GJhd6cYsNLT=DaA@mail.gmail.com/ Fixes: 0e2ee70291e6 ("media: uvcvideo: Mark invalid entities with id UVC_INVALID_ENTITY_ID") Cc: stable@vger.kernel.org Signed-off-by: Ricardo Ribalda Reviewed-by: Hans de Goede Signed-off-by: Hans Verkuil --- drivers/media/usb/uvc/uvc_driver.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index fb6afb8e84f0..ee4f54d68349 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -167,13 +167,26 @@ static struct uvc_entity *uvc_entity_by_reference(struct uvc_device *dev, static struct uvc_streaming *uvc_stream_by_id(struct uvc_device *dev, int id) { - struct uvc_streaming *stream; + struct uvc_streaming *stream, *last_stream; + unsigned int count = 0; list_for_each_entry(stream, &dev->streams, list) { + count += 1; + last_stream = stream; if (stream->header.bTerminalLink == id) return stream; } + /* + * If the streaming entity is referenced by an invalid ID, notify the + * user and use heuristics to guess the correct entity. + */ + if (count == 1 && id == UVC_INVALID_ENTITY_ID) { + dev_warn(&dev->intf->dev, + "UVC non compliance: Invalid USB header. The streaming entity has an invalid ID, guessing the correct one."); + return last_stream; + } + return NULL; } From 27afd6e066cfd80ddbe22a4a11b99174ac89cced Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 23 Oct 2025 16:26:34 +0200 Subject: [PATCH 0379/1075] media: videobuf2: forbid remove_bufs when legacy fileio is active vb2_ioctl_remove_bufs() call manipulates queue internal buffer list, potentially overwriting some pointers used by the legacy fileio access mode. Forbid that ioctl when fileio is active to protect internal queue state between subsequent read/write calls. CC: stable@vger.kernel.org Fixes: a3293a85381e ("media: v4l2: Add REMOVE_BUFS ioctl") Reported-by: Shuangpeng Bai Closes: https://lore.kernel.org/linux-media/5317B590-AAB4-4F17-8EA1-621965886D49@psu.edu/ Signed-off-by: Marek Szyprowski Signed-off-by: Hans Verkuil --- drivers/media/common/videobuf2/videobuf2-v4l2.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index d911021c1bb0..83862d57b126 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -1010,6 +1010,11 @@ int vb2_ioctl_remove_bufs(struct file *file, void *priv, if (vb2_queue_is_busy(vdev->queue, file)) return -EBUSY; + if (vb2_fileio_is_active(vdev->queue)) { + dprintk(vdev->queue, 1, "file io in progress\n"); + return -EBUSY; + } + return vb2_core_remove_bufs(vdev->queue, d->index, d->count); } EXPORT_SYMBOL_GPL(vb2_ioctl_remove_bufs); From 5d7e45dd670e42df4836afeaa9baf9d41ca4b434 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Oct 2025 16:48:59 +0100 Subject: [PATCH 0380/1075] genirq/chip: Add buslock back in to irq_set_handler() The locking was changed from a buslock to a plain lock, but the patch description states there was no functional change. Assuming this was accidental so reverting to using the buslock. Fixes: 5cd05f3e2315 ("genirq/chip: Rework irq_set_handler() variants") Signed-off-by: Charles Keepax Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251023154901.1333755-2-ckeepax@opensource.cirrus.com --- kernel/irq/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 3ffa0d80ddd1..d1917b28761a 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1030,7 +1030,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, const char *name) { - scoped_irqdesc_get_and_lock(irq, 0) + scoped_irqdesc_get_and_buslock(irq, 0) __irq_do_set_handler(scoped_irqdesc, handle, is_chained, name); } EXPORT_SYMBOL_GPL(__irq_set_handler); From 56363e25f79fe83e63039c5595b8cd9814173d37 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Oct 2025 16:49:00 +0100 Subject: [PATCH 0381/1075] genirq/manage: Add buslock back in to __disable_irq_nosync() The locking was changed from a buslock to a plain lock, but the patch description states there was no functional change. Assuming this was accidental so reverting to using the buslock. Fixes: 1b7444446724 ("genirq/manage: Rework __disable_irq_nosync()") Signed-off-by: Charles Keepax Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251023154901.1333755-3-ckeepax@opensource.cirrus.com --- kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c94837382037..7d68fb5dc242 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -659,7 +659,7 @@ void __disable_irq(struct irq_desc *desc) static int __disable_irq_nosync(unsigned int irq) { - scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { __disable_irq(scoped_irqdesc); return 0; } From ef3330b99c01bda53f2a189b58bed8f6b7397f28 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Oct 2025 16:49:01 +0100 Subject: [PATCH 0382/1075] genirq/manage: Add buslock back in to enable_irq() The locking was changed from a buslock to a plain lock, but the patch description states there was no functional change. Assuming this was accidental so reverting to using the buslock. Fixes: bddd10c55407 ("genirq/manage: Rework enable_irq()") Signed-off-by: Charles Keepax Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251023154901.1333755-4-ckeepax@opensource.cirrus.com --- kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 7d68fb5dc242..400856abf672 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -789,7 +789,7 @@ void __enable_irq(struct irq_desc *desc) */ void enable_irq(unsigned int irq) { - scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { struct irq_desc *desc = scoped_irqdesc; if (WARN(!desc->irq_data.chip, "enable_irq before setup/request_irq: irq %u\n", irq)) From 7f434e1d9a17ca5f567c9796c9c105a65c18db9a Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Thu, 23 Oct 2025 22:33:13 +0800 Subject: [PATCH 0383/1075] slab: Fix obj_ext mistakenly considered NULL due to race condition If two competing threads enter alloc_slab_obj_exts(), and the one that allocates the vector wins the cmpxchg(), the other thread that failed allocation mistakenly assumes that slab->obj_exts is still empty due to its own allocation failure. This will then trigger warnings with CONFIG_MEM_ALLOC_PROFILING_DEBUG checks in the subsequent free path. Therefore, let's check the result of cmpxchg() to see if marking the allocation as failed was successful. If it wasn't, check whether the winning side has succeeded its allocation (it might have been also marking it as failed) and if yes, return success. Suggested-by: Harry Yoo Fixes: f7381b911640 ("slab: mark slab->obj_exts allocation failures unconditionally") Cc: Signed-off-by: Hao Ge Link: https://patch.msgid.link/20251023143313.1327968-1-hao.ge@linux.dev Reviewed-by: Suren Baghdasaryan Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- mm/slub.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 87a1d2f9de0d..d4367f25b20d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2052,9 +2052,9 @@ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) } } -static inline void mark_failed_objexts_alloc(struct slab *slab) +static inline bool mark_failed_objexts_alloc(struct slab *slab) { - cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL); + return cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL) == 0; } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, @@ -2076,7 +2076,7 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {} -static inline void mark_failed_objexts_alloc(struct slab *slab) {} +static inline bool mark_failed_objexts_alloc(struct slab *slab) { return false; } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, struct slabobj_ext *vec, unsigned int objects) {} @@ -2124,8 +2124,14 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, slab_nid(slab)); } if (!vec) { - /* Mark vectors which failed to allocate */ - mark_failed_objexts_alloc(slab); + /* + * Try to mark vectors which failed to allocate. + * If this operation fails, there may be a racing process + * that has already completed the allocation. + */ + if (!mark_failed_objexts_alloc(slab) && + slab_obj_exts(slab)) + return 0; return -ENOMEM; } From 7209ff310083315386570bf8d001a0845fe7ab8c Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 21 Oct 2025 14:41:01 +0200 Subject: [PATCH 0384/1075] of/irq: Export of_msi_xlate() for module usage of_msi_xlate() is required by drivers that can be configured as modular, export the symbol. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Frank Li Cc: Rob Herring Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251021124103.198419-4-lpieralisi@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index ee7d5f0842e8..1cd93549d093 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -733,6 +733,7 @@ u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in) } return id_out; } +EXPORT_SYMBOL_GPL(of_msi_xlate); /** * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain From 840bc67cf01399b756e7714dc2f80eb9d40a0541 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 22 Oct 2025 14:25:58 +0200 Subject: [PATCH 0385/1075] s390: Update defconfigs Signed-off-by: Heiko Carstens --- arch/s390/configs/debug_defconfig | 14 +++++++++----- arch/s390/configs/defconfig | 14 +++++++++----- arch/s390/configs/zfcpdump_defconfig | 1 - 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index b31c1df90257..8433f769f7e1 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -101,6 +101,7 @@ CONFIG_SLUB_STATS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y +CONFIG_PERSISTENT_HUGE_ZERO_FOLIO=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y @@ -123,12 +124,12 @@ CONFIG_TLS_DEVICE=y CONFIG_TLS_TOE=y CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_XDP_SOCKETS=y -CONFIG_XDP_SOCKETS_DIAG=m -CONFIG_DIBS=y -CONFIG_DIBS_LO=y CONFIG_SMC=m CONFIG_SMC_DIAG=m +CONFIG_DIBS=y +CONFIG_DIBS_LO=y +CONFIG_XDP_SOCKETS=y +CONFIG_XDP_SOCKETS_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -472,6 +473,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_LLBITMAP=y # CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m @@ -654,9 +656,12 @@ CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_XFS_FS=y +CONFIG_XFS_SUPPORT_V4=y +CONFIG_XFS_SUPPORT_ASCII_CI=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y +# CONFIG_XFS_ONLINE_SCRUB is not set CONFIG_XFS_DEBUG=y CONFIG_GFS2_FS=m CONFIG_GFS2_FS_LOCKING_DLM=y @@ -666,7 +671,6 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_BTRFS_DEBUG=y CONFIG_BTRFS_ASSERT=y CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 161dad7ef211..4414dabd04a6 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -94,6 +94,7 @@ CONFIG_SLAB_BUCKETS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y +CONFIG_PERSISTENT_HUGE_ZERO_FOLIO=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 @@ -114,12 +115,12 @@ CONFIG_TLS_DEVICE=y CONFIG_TLS_TOE=y CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_XDP_SOCKETS=y -CONFIG_XDP_SOCKETS_DIAG=m -CONFIG_DIBS=y -CONFIG_DIBS_LO=y CONFIG_SMC=m CONFIG_SMC_DIAG=m +CONFIG_DIBS=y +CONFIG_DIBS_LO=y +CONFIG_XDP_SOCKETS=y +CONFIG_XDP_SOCKETS_DIAG=m CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y @@ -462,6 +463,7 @@ CONFIG_SCSI_DH_EMC=m CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y +CONFIG_MD_LLBITMAP=y # CONFIG_MD_BITMAP_FILE is not set CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m @@ -644,16 +646,18 @@ CONFIG_JFS_POSIX_ACL=y CONFIG_JFS_SECURITY=y CONFIG_JFS_STATISTICS=y CONFIG_XFS_FS=y +CONFIG_XFS_SUPPORT_V4=y +CONFIG_XFS_SUPPORT_ASCII_CI=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_XFS_RT=y +# CONFIG_XFS_ONLINE_SCRUB is not set CONFIG_GFS2_FS=m CONFIG_GFS2_FS_LOCKING_DLM=y CONFIG_OCFS2_FS=m CONFIG_BTRFS_FS=y CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m -CONFIG_FS_DAX=y CONFIG_EXPORTFS_BLOCK_OPS=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_VERITY=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index ed0b137353ad..b5478267d6a7 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -33,7 +33,6 @@ CONFIG_NET=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_SAFE=y CONFIG_BLK_DEV_RAM=y -# CONFIG_DCSSBLK is not set # CONFIG_DASD is not set CONFIG_ENCLOSURE_SERVICES=y CONFIG_SCSI=y From b45873c3f09153d1ad9b3a7bf9e5c0b0387fd2ea Mon Sep 17 00:00:00 2001 From: Farhan Ali Date: Wed, 22 Oct 2025 09:47:26 -0700 Subject: [PATCH 0386/1075] s390/pci: Restore IRQ unconditionally for the zPCI device Commit c1e18c17bda6 ("s390/pci: add zpci_set_irq()/zpci_clear_irq()"), introduced the zpci_set_irq() and zpci_clear_irq(), to be used while resetting a zPCI device. Commit da995d538d3a ("s390/pci: implement reset_slot for hotplug slot"), mentions zpci_clear_irq() being called in the path for zpci_hot_reset_device(). But that is not the case anymore and these functions are not called outside of this file. Instead zpci_hot_reset_device() relies on zpci_disable_device() also clearing the IRQs, but misses to reset the zdev->irqs_registered flag. However after a CLP disable/enable reset, the device's IRQ are unregistered, but the flag zdev->irq_registered does not get cleared. It creates an inconsistent state and so arch_restore_msi_irqs() doesn't correctly restore the device's IRQ. This becomes a problem when a PCI driver tries to restore the state of the device through pci_restore_state(). Restore IRQ unconditionally for the device and remove the irq_registered flag as its redundant. Fixes: c1e18c17bda6 ("s390/pci: add zpci_set_irq()/zpci_clear_irq()") Cc: stable@vger.kernnel.org Reviewed-by: Niklas Schnelle Reviewed-by: Matthew Rosato Signed-off-by: Farhan Ali Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pci.h | 1 - arch/s390/pci/pci_irq.c | 9 +-------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 6890925d5587..a32f465ecf73 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -145,7 +145,6 @@ struct zpci_dev { u8 has_resources : 1; u8 is_physfn : 1; u8 util_str_avail : 1; - u8 irqs_registered : 1; u8 tid_avail : 1; u8 rtr_avail : 1; /* Relaxed translation allowed */ unsigned int devfn; /* DEVFN part of the RID*/ diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 84482a921332..e73be96ce5fe 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -107,9 +107,6 @@ static int zpci_set_irq(struct zpci_dev *zdev) else rc = zpci_set_airq(zdev); - if (!rc) - zdev->irqs_registered = 1; - return rc; } @@ -123,9 +120,6 @@ static int zpci_clear_irq(struct zpci_dev *zdev) else rc = zpci_clear_airq(zdev); - if (!rc) - zdev->irqs_registered = 0; - return rc; } @@ -427,8 +421,7 @@ bool arch_restore_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); - if (!zdev->irqs_registered) - zpci_set_irq(zdev); + zpci_set_irq(zdev); return true; } From 09b0cd1297b4dbfe736aeaa0ceeab2265f47f772 Mon Sep 17 00:00:00 2001 From: Cen Zhang Date: Mon, 29 Sep 2025 05:30:17 +0000 Subject: [PATCH 0387/1075] Bluetooth: hci_sync: fix race in hci_cmd_sync_dequeue_once hci_cmd_sync_dequeue_once() does lookup and then cancel the entry under two separate lock sections. Meanwhile, hci_cmd_sync_work() can also delete the same entry, leading to double list_del() and "UAF". Fix this by holding cmd_sync_work_lock across both lookup and cancel, so that the entry cannot be removed concurrently. Fixes: 505ea2b29592 ("Bluetooth: hci_sync: Add helper functions to manipulate cmd_sync queue") Reported-by: Cen Zhang Signed-off-by: Cen Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index eefdb6134ca5..d160e5e1fe8a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -863,11 +863,17 @@ bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, { struct hci_cmd_sync_work_entry *entry; - entry = hci_cmd_sync_lookup_entry(hdev, func, data, destroy); - if (!entry) - return false; + mutex_lock(&hdev->cmd_sync_work_lock); - hci_cmd_sync_cancel_entry(hdev, entry); + entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy); + if (!entry) { + mutex_unlock(&hdev->cmd_sync_work_lock); + return false; + } + + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); + + mutex_unlock(&hdev->cmd_sync_work_lock); return true; } From f0c200a4a537f8f374584a974518b0ce69eda76c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 26 Sep 2025 11:48:50 -0400 Subject: [PATCH 0388/1075] Bluetooth: ISO: Fix BIS connection dst_type handling Socket dst_type cannot be directly assigned to hci_conn->type since there domain is different which may lead to the wrong address type being used. Fixes: 6a5ad251b7cd ("Bluetooth: ISO: Fix possible circular locking dependency") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 9b263d061e05..954e1916506b 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2032,7 +2032,7 @@ static void iso_conn_ready(struct iso_conn *conn) */ if (!bacmp(&hcon->dst, BDADDR_ANY)) { bacpy(&hcon->dst, &iso_pi(parent)->dst); - hcon->dst_type = iso_pi(parent)->dst_type; + hcon->dst_type = le_addr_type(iso_pi(parent)->dst_type); } if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) { From 77343b8b4f87560f8f03e77b98a81ff3a147b262 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 30 Sep 2025 13:39:33 +0800 Subject: [PATCH 0389/1075] Bluetooth: btmtksdio: Add pmctrl handling for BT closed state during reset This patch adds logic to handle power management control when the Bluetooth function is closed during the SDIO reset sequence. Specifically, if BT is closed before reset, the driver enables the SDIO function and sets driver pmctrl. After reset, if BT remains closed, the driver sets firmware pmctrl and disables the SDIO function. These changes ensure proper power management and device state consistency across the reset flow. Fixes: 8fafe702253d ("Bluetooth: mt7921s: support bluetooth reset mechanism") Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtksdio.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 50abefba6d04..62db31bd6592 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1270,6 +1270,12 @@ static void btmtksdio_reset(struct hci_dev *hdev) sdio_claim_host(bdev->func); + /* set drv_pmctrl if BT is closed before doing reset */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) { + sdio_enable_func(bdev->func); + btmtksdio_drv_pmctrl(bdev); + } + sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, NULL); skb_queue_purge(&bdev->txq); cancel_work_sync(&bdev->txrx_work); @@ -1285,6 +1291,12 @@ static void btmtksdio_reset(struct hci_dev *hdev) goto err; } + /* set fw_pmctrl back if BT is closed after doing reset */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) { + btmtksdio_fw_pmctrl(bdev); + sdio_disable_func(bdev->func); + } + clear_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); err: sdio_release_host(bdev->func); From 0d92808024b4e9868cef68d16f121d509843e80e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Oct 2025 10:55:58 -0400 Subject: [PATCH 0390/1075] Bluetooth: HCI: Fix tracking of advertisement set/instance 0x00 This fixes the state tracking of advertisement set/instance 0x00 which is considered a legacy instance and is not tracked individually by adv_instances list, previously it was assumed that hci_dev itself would track it via HCI_LE_ADV but that is a global state not specifc to instance 0x00, so to fix it a new flag is introduced that only tracks the state of instance 0x00. Fixes: 1488af7b8b5f ("Bluetooth: hci_sync: Fix hci_resume_advertising_sync") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + net/bluetooth/hci_event.c | 4 ++++ net/bluetooth/hci_sync.c | 5 ++--- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 9ecc70baaca9..8d0e703bc929 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -434,6 +434,7 @@ enum { HCI_USER_CHANNEL, HCI_EXT_CONFIGURED, HCI_LE_ADV, + HCI_LE_ADV_0, HCI_LE_PER_ADV, HCI_LE_SCAN, HCI_SSP_ENABLED, diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d790b0d4eb9a..1dabf5a7ae18 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1609,6 +1609,8 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, if (adv && !adv->periodic) adv->enabled = true; + else if (!set->handle) + hci_dev_set_flag(hdev, HCI_LE_ADV_0); conn = hci_lookup_le_connect(hdev); if (conn) @@ -1619,6 +1621,8 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, if (cp->num_of_sets) { if (adv) adv->enabled = false; + else if (!set->handle) + hci_dev_clear_flag(hdev, HCI_LE_ADV_0); /* If just one instance was disabled check if there are * any other instance enabled before clearing HCI_LE_ADV diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d160e5e1fe8a..28ad08cd7d70 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2606,9 +2606,8 @@ static int hci_resume_advertising_sync(struct hci_dev *hdev) /* If current advertising instance is set to instance 0x00 * then we need to re-enable it. */ - if (!hdev->cur_adv_instance) - err = hci_enable_ext_advertising_sync(hdev, - hdev->cur_adv_instance); + if (hci_dev_test_and_clear_flag(hdev, HCI_LE_ADV_0)) + err = hci_enable_ext_advertising_sync(hdev, 0x00); } else { /* Schedule for most recent instance to be restarted and begin * the software rotation loop From e8785404de06a69d89dcdd1e9a0b6ea42dc6d327 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Fri, 3 Oct 2025 22:07:32 +0300 Subject: [PATCH 0391/1075] Bluetooth: MGMT: fix crash in set_mesh_sync and set_mesh_complete There is a BUG: KASAN: stack-out-of-bounds in set_mesh_sync due to memcpy from badly declared on-stack flexible array. Another crash is in set_mesh_complete() due to double list_del via mgmt_pending_valid + mgmt_pending_remove. Use DEFINE_FLEX to declare the flexible array right, and don't memcpy outside bounds. As mgmt_pending_valid removes the cmd from list, use mgmt_pending_free, and also report status on error. Fixes: 302a1f674c00d ("Bluetooth: MGMT: Fix possible UAFs") Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 +- net/bluetooth/mgmt.c | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 74edea06985b..bca0333f1e99 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -853,7 +853,7 @@ struct mgmt_cp_set_mesh { __le16 window; __le16 period; __u8 num_ad_types; - __u8 ad_types[]; + __u8 ad_types[] __counted_by(num_ad_types); } __packed; #define MGMT_SET_MESH_RECEIVER_SIZE 6 diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a3d16eece0d2..24e335e3a727 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2175,19 +2175,24 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) sk = cmd->sk; if (status) { + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + status); mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, cmd_status_rsp, &status); - return; + goto done; } - mgmt_pending_remove(cmd); mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, 0, NULL, 0); + +done: + mgmt_pending_free(cmd); } static int set_mesh_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_set_mesh cp; + DEFINE_FLEX(struct mgmt_cp_set_mesh, cp, ad_types, num_ad_types, + sizeof(hdev->mesh_ad_types)); size_t len; mutex_lock(&hdev->mgmt_pending_lock); @@ -2197,27 +2202,26 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data) return -ECANCELED; } - memcpy(&cp, cmd->param, sizeof(cp)); + len = cmd->param_len; + memcpy(cp, cmd->param, min(__struct_size(cp), len)); mutex_unlock(&hdev->mgmt_pending_lock); - len = cmd->param_len; - memset(hdev->mesh_ad_types, 0, sizeof(hdev->mesh_ad_types)); - if (cp.enable) + if (cp->enable) hci_dev_set_flag(hdev, HCI_MESH); else hci_dev_clear_flag(hdev, HCI_MESH); - hdev->le_scan_interval = __le16_to_cpu(cp.period); - hdev->le_scan_window = __le16_to_cpu(cp.window); + hdev->le_scan_interval = __le16_to_cpu(cp->period); + hdev->le_scan_window = __le16_to_cpu(cp->window); - len -= sizeof(cp); + len -= sizeof(struct mgmt_cp_set_mesh); /* If filters don't fit, forward all adv pkts */ if (len <= sizeof(hdev->mesh_ad_types)) - memcpy(hdev->mesh_ad_types, cp.ad_types, len); + memcpy(hdev->mesh_ad_types, cp->ad_types, len); hci_update_passive_scan_sync(hdev); return 0; From 76e20da0bd00c556ed0a1e7250bdb6ac3e808ea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Mon, 6 Oct 2025 10:35:44 +0200 Subject: [PATCH 0392/1075] Revert "Bluetooth: L2CAP: convert timeouts to secs_to_jiffies()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c9d84da18d1e0d28a7e16ca6df8e6d47570501d4. It replaces in L2CAP calls to msecs_to_jiffies() to secs_to_jiffies() and updates the constants accordingly. But the constants are also used in LCAP Configure Request and L2CAP Configure Response which expect values in milliseconds. This may prevent correct usage of L2CAP channel. To fix it, keep those constants in milliseconds and so revert this change. Fixes: c9d84da18d1e ("Bluetooth: L2CAP: convert timeouts to secs_to_jiffies()") Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/l2cap.h | 4 ++-- net/bluetooth/l2cap_core.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 4bb0eaedda18..00e182a22720 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -38,8 +38,8 @@ #define L2CAP_DEFAULT_TX_WINDOW 63 #define L2CAP_DEFAULT_EXT_WINDOW 0x3FFF #define L2CAP_DEFAULT_MAX_TX 3 -#define L2CAP_DEFAULT_RETRANS_TO 2 /* seconds */ -#define L2CAP_DEFAULT_MONITOR_TO 12 /* seconds */ +#define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */ +#define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ #define L2CAP_DEFAULT_MAX_PDU_SIZE 1492 /* Sized for AMP packet */ #define L2CAP_DEFAULT_ACK_TO 200 #define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 805c752ac0a9..d08320380ad6 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -282,7 +282,7 @@ static void __set_retrans_timer(struct l2cap_chan *chan) if (!delayed_work_pending(&chan->monitor_timer) && chan->retrans_timeout) { l2cap_set_timer(chan, &chan->retrans_timer, - secs_to_jiffies(chan->retrans_timeout)); + msecs_to_jiffies(chan->retrans_timeout)); } } @@ -291,7 +291,7 @@ static void __set_monitor_timer(struct l2cap_chan *chan) __clear_retrans_timer(chan); if (chan->monitor_timeout) { l2cap_set_timer(chan, &chan->monitor_timer, - secs_to_jiffies(chan->monitor_timeout)); + msecs_to_jiffies(chan->monitor_timeout)); } } From c403da5e98b04a2aec9cfb25cbeeb28d7ce29975 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 7 Oct 2025 13:29:15 -0400 Subject: [PATCH 0393/1075] Bluetooth: ISO: Fix another instance of dst_type handling Socket dst_type cannot be directly assigned to hci_conn->type since there domain is different which may lead to the wrong address type being used. Fixes: 6a5ad251b7cd ("Bluetooth: ISO: Fix possible circular locking dependency") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 954e1916506b..3d98cb6291da 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2046,7 +2046,13 @@ static void iso_conn_ready(struct iso_conn *conn) } bacpy(&iso_pi(sk)->dst, &hcon->dst); - iso_pi(sk)->dst_type = hcon->dst_type; + + /* Convert from HCI to three-value type */ + if (hcon->dst_type == ADDR_LE_DEV_PUBLIC) + iso_pi(sk)->dst_type = BDADDR_LE_PUBLIC; + else + iso_pi(sk)->dst_type = BDADDR_LE_RANDOM; + iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle; memcpy(iso_pi(sk)->base, iso_pi(parent)->base, iso_pi(parent)->base_len); iso_pi(sk)->base_len = iso_pi(parent)->base_len; From 057b6ca5961203f16a2a02fb0592661a7a959a84 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Thu, 16 Oct 2025 10:00:43 +0530 Subject: [PATCH 0394/1075] Bluetooth: btintel_pcie: Fix event packet loss issue In the current btintel_pcie driver implementation, when an interrupt is received, the driver checks for the alive cause before the TX/RX cause. Handling the alive cause involves resetting the TX/RX queue indices. This flow works correctly when the causes are mutually exclusive. However, if both cause bits are set simultaneously, the alive cause resets the queue indices, resulting in an event packet drop and a command timeout. To fix this issue, the driver is modified to handle all other causes before checking for the alive cause. Test case: Issue is seen with stress reboot scenario - 50x run [20.337589] Bluetooth: hci0: Device revision is 0 [20.346750] Bluetooth: hci0: Secure boot is enabled [20.346752] Bluetooth: hci0: OTP lock is disabled [20.346752] Bluetooth: hci0: API lock is enabled [20.346752] Bluetooth: hci0: Debug lock is disabled [20.346753] Bluetooth: hci0: Minimum firmware build 1 week 10 2014 [20.346754] Bluetooth: hci0: Bootloader timestamp 2023.43 buildtype 1 build 11631 [20.359070] Bluetooth: hci0: Found device firmware: intel/ibt-00a0-00a1-iml.sfi [20.371499] Bluetooth: hci0: Boot Address: 0xb02ff800 [20.385769] Bluetooth: hci0: Firmware Version: 166-34.25 [20.538257] Bluetooth: hci0: Waiting for firmware download to complete [20.554424] Bluetooth: hci0: Firmware loaded in 178651 usecs [21.081588] Bluetooth: hci0: Timeout (500 ms) on tx completion [21.096541] Bluetooth: hci0: Failed to send frame (-62) [21.110240] Bluetooth: hci0: sending frame failed (-62) [21.138551] Bluetooth: hci0: Failed to send Intel Reset command [21.170153] Bluetooth: hci0: Intel Soft Reset failed (-62) Signed-off-by: Kiran K Signed-off-by: Sai Teja Aluvala Reviewed-by: Paul Menzel Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 6d3963bd56a9..a075d8ec4677 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1467,11 +1467,6 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP1) btintel_pcie_msix_gp1_handler(data); - /* This interrupt is triggered by the firmware after updating - * boot_stage register and image_response register - */ - if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP0) - btintel_pcie_msix_gp0_handler(data); /* For TX */ if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) { @@ -1487,6 +1482,12 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) btintel_pcie_msix_tx_handle(data); } + /* This interrupt is triggered by the firmware after updating + * boot_stage register and image_response register + */ + if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP0) + btintel_pcie_msix_gp0_handler(data); + /* * Before sending the interrupt the HW disables it to prevent a nested * interrupt. This is done by writing 1 to the corresponding bit in From b489556a856d31f1eb73972150f371d2e4ce1de8 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 23 Oct 2025 11:47:19 -0700 Subject: [PATCH 0395/1075] Bluetooth: fix corruption in h4_recv_buf() after cleanup A different structure is stored in drvdata for the drivers which used that duplicate function, but h4_recv_buf() assumes drvdata is always an hci_uart structure. Consequently, alignment and padding are now randomly corrupted for btmtkuart, btnxpuart, and bpa10x in h4_recv_buf(), causing erratic breakage. Fix this by making the hci_uart structure the explicit argument to h4_recv_buf(). Every caller already has a reference to hci_uart, and already obtains the hci_hdev reference through it, so this actually eliminates a redundant pointer indirection for all existing callers. Fixes: 93f06f8f0daf ("Bluetooth: remove duplicate h4_recv_buf() in header") Reported-by: Francesco Valla Closes: https://lore.kernel.org/lkml/6837167.ZASKD2KPVS@fedora.fritz.box/ Signed-off-by: Calvin Owens Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/bpa10x.c | 4 +++- drivers/bluetooth/btmtkuart.c | 4 +++- drivers/bluetooth/btnxpuart.c | 4 +++- drivers/bluetooth/hci_ag6xx.c | 2 +- drivers/bluetooth/hci_aml.c | 2 +- drivers/bluetooth/hci_ath.c | 2 +- drivers/bluetooth/hci_bcm.c | 2 +- drivers/bluetooth/hci_h4.c | 6 +++--- drivers/bluetooth/hci_intel.c | 2 +- drivers/bluetooth/hci_ll.c | 2 +- drivers/bluetooth/hci_mrvl.c | 6 +++--- drivers/bluetooth/hci_nokia.c | 4 ++-- drivers/bluetooth/hci_qca.c | 2 +- drivers/bluetooth/hci_uart.h | 2 +- 14 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index b7ba667a3d09..e305d04aac9d 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -41,6 +41,7 @@ struct bpa10x_data { struct usb_anchor rx_anchor; struct sk_buff *rx_skb[2]; + struct hci_uart hu; }; static void bpa10x_tx_complete(struct urb *urb) @@ -96,7 +97,7 @@ static void bpa10x_rx_complete(struct urb *urb) if (urb->status == 0) { bool idx = usb_pipebulk(urb->pipe); - data->rx_skb[idx] = h4_recv_buf(hdev, data->rx_skb[idx], + data->rx_skb[idx] = h4_recv_buf(&data->hu, data->rx_skb[idx], urb->transfer_buffer, urb->actual_length, bpa10x_recv_pkts, @@ -388,6 +389,7 @@ static int bpa10x_probe(struct usb_interface *intf, hci_set_drvdata(hdev, data); data->hdev = hdev; + data->hu.hdev = hdev; SET_HCIDEV_DEV(hdev, &intf->dev); diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index d9b90ea2ad38..27aa48ff3ac2 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -79,6 +79,7 @@ struct btmtkuart_dev { u16 stp_dlen; const struct btmtkuart_data *data; + struct hci_uart hu; }; #define btmtkuart_is_standalone(bdev) \ @@ -368,7 +369,7 @@ static void btmtkuart_recv(struct hci_dev *hdev, const u8 *data, size_t count) sz_left -= adv; p_left += adv; - bdev->rx_skb = h4_recv_buf(bdev->hdev, bdev->rx_skb, p_h4, + bdev->rx_skb = h4_recv_buf(&bdev->hu, bdev->rx_skb, p_h4, sz_h4, mtk_recv_pkts, ARRAY_SIZE(mtk_recv_pkts)); if (IS_ERR(bdev->rx_skb)) { @@ -858,6 +859,7 @@ static int btmtkuart_probe(struct serdev_device *serdev) } bdev->hdev = hdev; + bdev->hu.hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, bdev); diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index d5153fed0518..3b1e9224e965 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -212,6 +212,7 @@ struct btnxpuart_dev { struct ps_data psdata; struct btnxpuart_data *nxp_data; struct reset_control *pdn; + struct hci_uart hu; }; #define NXP_V1_FW_REQ_PKT 0xa5 @@ -1756,7 +1757,7 @@ static size_t btnxpuart_receive_buf(struct serdev_device *serdev, ps_start_timer(nxpdev); - nxpdev->rx_skb = h4_recv_buf(nxpdev->hdev, nxpdev->rx_skb, data, count, + nxpdev->rx_skb = h4_recv_buf(&nxpdev->hu, nxpdev->rx_skb, data, count, nxp_recv_pkts, ARRAY_SIZE(nxp_recv_pkts)); if (IS_ERR(nxpdev->rx_skb)) { int err = PTR_ERR(nxpdev->rx_skb); @@ -1875,6 +1876,7 @@ static int nxp_serdev_probe(struct serdev_device *serdev) reset_control_deassert(nxpdev->pdn); nxpdev->hdev = hdev; + nxpdev->hu.hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, nxpdev); diff --git a/drivers/bluetooth/hci_ag6xx.c b/drivers/bluetooth/hci_ag6xx.c index 2d40302409ff..94588676510f 100644 --- a/drivers/bluetooth/hci_ag6xx.c +++ b/drivers/bluetooth/hci_ag6xx.c @@ -105,7 +105,7 @@ static int ag6xx_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - ag6xx->rx_skb = h4_recv_buf(hu->hdev, ag6xx->rx_skb, data, count, + ag6xx->rx_skb = h4_recv_buf(hu, ag6xx->rx_skb, data, count, ag6xx_recv_pkts, ARRAY_SIZE(ag6xx_recv_pkts)); if (IS_ERR(ag6xx->rx_skb)) { diff --git a/drivers/bluetooth/hci_aml.c b/drivers/bluetooth/hci_aml.c index 707e90f80130..b1f32c5a8a3f 100644 --- a/drivers/bluetooth/hci_aml.c +++ b/drivers/bluetooth/hci_aml.c @@ -650,7 +650,7 @@ static int aml_recv(struct hci_uart *hu, const void *data, int count) struct aml_data *aml_data = hu->priv; int err; - aml_data->rx_skb = h4_recv_buf(hu->hdev, aml_data->rx_skb, data, count, + aml_data->rx_skb = h4_recv_buf(hu, aml_data->rx_skb, data, count, aml_recv_pkts, ARRAY_SIZE(aml_recv_pkts)); if (IS_ERR(aml_data->rx_skb)) { diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index dbfe34664633..8d2b5e7f0d6a 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -191,7 +191,7 @@ static int ath_recv(struct hci_uart *hu, const void *data, int count) { struct ath_struct *ath = hu->priv; - ath->rx_skb = h4_recv_buf(hu->hdev, ath->rx_skb, data, count, + ath->rx_skb = h4_recv_buf(hu, ath->rx_skb, data, count, ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts)); if (IS_ERR(ath->rx_skb)) { int err = PTR_ERR(ath->rx_skb); diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index f96617b85d87..fff845ed44e3 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -698,7 +698,7 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - bcm->rx_skb = h4_recv_buf(hu->hdev, bcm->rx_skb, data, count, + bcm->rx_skb = h4_recv_buf(hu, bcm->rx_skb, data, count, bcm_recv_pkts, ARRAY_SIZE(bcm_recv_pkts)); if (IS_ERR(bcm->rx_skb)) { int err = PTR_ERR(bcm->rx_skb); diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 9070e31a68bf..ec017df8572c 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -112,7 +112,7 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - h4->rx_skb = h4_recv_buf(hu->hdev, h4->rx_skb, data, count, + h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) { int err = PTR_ERR(h4->rx_skb); @@ -151,12 +151,12 @@ int __exit h4_deinit(void) return hci_uart_unregister_proto(&h4p); } -struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb, +struct sk_buff *h4_recv_buf(struct hci_uart *hu, struct sk_buff *skb, const unsigned char *buffer, int count, const struct h4_recv_pkt *pkts, int pkts_count) { - struct hci_uart *hu = hci_get_drvdata(hdev); u8 alignment = hu->alignment ? hu->alignment : 1; + struct hci_dev *hdev = hu->hdev; /* Check for error from previous call */ if (IS_ERR(skb)) diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 9b353c3d6442..1d6e09508f1f 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -972,7 +972,7 @@ static int intel_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - intel->rx_skb = h4_recv_buf(hu->hdev, intel->rx_skb, data, count, + intel->rx_skb = h4_recv_buf(hu, intel->rx_skb, data, count, intel_recv_pkts, ARRAY_SIZE(intel_recv_pkts)); if (IS_ERR(intel->rx_skb)) { diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 7044c86325ce..6f4e25917b86 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -429,7 +429,7 @@ static int ll_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - ll->rx_skb = h4_recv_buf(hu->hdev, ll->rx_skb, data, count, + ll->rx_skb = h4_recv_buf(hu, ll->rx_skb, data, count, ll_recv_pkts, ARRAY_SIZE(ll_recv_pkts)); if (IS_ERR(ll->rx_skb)) { int err = PTR_ERR(ll->rx_skb); diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c index e08222395772..8767522ec4c6 100644 --- a/drivers/bluetooth/hci_mrvl.c +++ b/drivers/bluetooth/hci_mrvl.c @@ -264,9 +264,9 @@ static int mrvl_recv(struct hci_uart *hu, const void *data, int count) !test_bit(STATE_FW_LOADED, &mrvl->flags)) return count; - mrvl->rx_skb = h4_recv_buf(hu->hdev, mrvl->rx_skb, data, count, - mrvl_recv_pkts, - ARRAY_SIZE(mrvl_recv_pkts)); + mrvl->rx_skb = h4_recv_buf(hu, mrvl->rx_skb, data, count, + mrvl_recv_pkts, + ARRAY_SIZE(mrvl_recv_pkts)); if (IS_ERR(mrvl->rx_skb)) { int err = PTR_ERR(mrvl->rx_skb); bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c index cd7575c20f65..1e65b541f8ad 100644 --- a/drivers/bluetooth/hci_nokia.c +++ b/drivers/bluetooth/hci_nokia.c @@ -624,8 +624,8 @@ static int nokia_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - btdev->rx_skb = h4_recv_buf(hu->hdev, btdev->rx_skb, data, count, - nokia_recv_pkts, ARRAY_SIZE(nokia_recv_pkts)); + btdev->rx_skb = h4_recv_buf(hu, btdev->rx_skb, data, count, + nokia_recv_pkts, ARRAY_SIZE(nokia_recv_pkts)); if (IS_ERR(btdev->rx_skb)) { err = PTR_ERR(btdev->rx_skb); dev_err(dev, "Frame reassembly failed (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4cff4d9be313..888176b0faa9 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1277,7 +1277,7 @@ static int qca_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - qca->rx_skb = h4_recv_buf(hu->hdev, qca->rx_skb, data, count, + qca->rx_skb = h4_recv_buf(hu, qca->rx_skb, data, count, qca_recv_pkts, ARRAY_SIZE(qca_recv_pkts)); if (IS_ERR(qca->rx_skb)) { int err = PTR_ERR(qca->rx_skb); diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index cbbe79b241ce..48ac7ca9334e 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -162,7 +162,7 @@ struct h4_recv_pkt { int h4_init(void); int h4_deinit(void); -struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb, +struct sk_buff *h4_recv_buf(struct hci_uart *hu, struct sk_buff *skb, const unsigned char *buffer, int count, const struct h4_recv_pkt *pkts, int pkts_count); #endif From 857eb0fabc389be5159e0e17d84bc122614b5b98 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Oct 2025 16:29:41 -0400 Subject: [PATCH 0396/1075] Bluetooth: hci_conn: Fix connection cleanup with BIG with 2 or more BIS This fixes bis_cleanup not considering connections in BT_OPEN state before attempting to remove the BIG causing the following error: btproxy[20110]: < HCI Command: LE Terminate Broadcast Isochronous Group (0x08|0x006a) plen 2 BIG Handle: 0x01 Reason: Connection Terminated By Local Host (0x16) > HCI Event: Command Status (0x0f) plen 4 LE Terminate Broadcast Isochronous Group (0x08|0x006a) ncmd 1 Status: Unknown Advertising Identifier (0x42) Fixes: fa224d0c094a ("Bluetooth: ISO: Reassociate a socket with an active BIS") Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Paul Menzel --- net/bluetooth/hci_conn.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 111f0e37b672..c5dedf39a129 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -843,6 +843,13 @@ static void bis_cleanup(struct hci_conn *conn) if (bis) return; + bis = hci_conn_hash_lookup_big_state(hdev, + conn->iso_qos.bcast.big, + BT_OPEN, + HCI_ROLE_MASTER); + if (bis) + return; + hci_le_terminate_big(hdev, conn); } else { hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, From 751463ceefc3397566d03c8b64ef4a77f5fd88ac Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Oct 2025 16:03:19 -0400 Subject: [PATCH 0397/1075] Bluetooth: hci_core: Fix tracking of periodic advertisement Periodic advertising enabled flag cannot be tracked by the enabled flag since advertising and periodic advertising each can be enabled/disabled separately from one another causing the states to be inconsistent when for example an advertising set is disabled its enabled flag is set to false which is then used for periodic which has not being disabled. Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_event.c | 7 +++++-- net/bluetooth/hci_sync.c | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2924c2bf2a98..b8100dbfe5d7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -244,6 +244,7 @@ struct adv_info { bool enabled; bool pending; bool periodic; + bool periodic_enabled; __u8 mesh; __u8 instance; __u8 handle; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1dabf5a7ae18..d37db364acf7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1607,7 +1607,7 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, hci_dev_set_flag(hdev, HCI_LE_ADV); - if (adv && !adv->periodic) + if (adv) adv->enabled = true; else if (!set->handle) hci_dev_set_flag(hdev, HCI_LE_ADV_0); @@ -3963,8 +3963,11 @@ static u8 hci_cc_le_set_per_adv_enable(struct hci_dev *hdev, void *data, hci_dev_set_flag(hdev, HCI_LE_PER_ADV); if (adv) - adv->enabled = true; + adv->periodic_enabled = true; } else { + if (adv) + adv->periodic_enabled = false; + /* If just one instance was disabled check if there are * any other instance enabled before clearing HCI_LE_PER_ADV. * The current periodic adv instance will be marked as diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 28ad08cd7d70..73fc41b68b68 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1607,7 +1607,7 @@ int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) /* If periodic advertising already disabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); - if (!adv || !adv->periodic || !adv->enabled) + if (!adv || !adv->periodic_enabled) return 0; memset(&cp, 0, sizeof(cp)); @@ -1672,7 +1672,7 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) /* If periodic advertising already enabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); - if (adv && adv->periodic && adv->enabled) + if (adv && adv->periodic_enabled) return 0; memset(&cp, 0, sizeof(cp)); From 91d35ec9b3956d6b3cf789c1593467e58855b03a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 23 Oct 2025 14:05:30 +0200 Subject: [PATCH 0398/1075] Bluetooth: rfcomm: fix modem control handling The RFCOMM driver confuses the local and remote modem control signals, which specifically means that the reported DTR and RTS state will instead reflect the remote end (i.e. DSR and CTS). This issue dates back to the original driver (and a follow-on update) merged in 2002, which resulted in a non-standard implementation of TIOCMSET that allowed controlling also the TS07.10 IC and DV signals by mapping them to the RI and DCD input flags, while TIOCMGET failed to return the actual state of DTR and RTS. Note that the bogus control of input signals in tiocmset() is just dead code as those flags will have been masked out by the tty layer since 2003. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/rfcomm/tty.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 376ce6de84be..b783526ab588 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -643,8 +643,8 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) tty_port_tty_hangup(&dev->port, true); dev->modem_status = - ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) | - ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) | + ((v24_sig & RFCOMM_V24_RTC) ? TIOCM_DSR : 0) | + ((v24_sig & RFCOMM_V24_RTR) ? TIOCM_CTS : 0) | ((v24_sig & RFCOMM_V24_IC) ? TIOCM_RI : 0) | ((v24_sig & RFCOMM_V24_DV) ? TIOCM_CD : 0); } @@ -1055,10 +1055,14 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) static int rfcomm_tty_tiocmget(struct tty_struct *tty) { struct rfcomm_dev *dev = tty->driver_data; + struct rfcomm_dlc *dlc = dev->dlc; + u8 v24_sig; BT_DBG("tty %p dev %p", tty, dev); - return dev->modem_status; + rfcomm_dlc_get_modem_status(dlc, &v24_sig); + + return (v24_sig & (TIOCM_DTR | TIOCM_RTS)) | dev->modem_status; } static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) @@ -1071,23 +1075,15 @@ static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigne rfcomm_dlc_get_modem_status(dlc, &v24_sig); - if (set & TIOCM_DSR || set & TIOCM_DTR) + if (set & TIOCM_DTR) v24_sig |= RFCOMM_V24_RTC; - if (set & TIOCM_RTS || set & TIOCM_CTS) + if (set & TIOCM_RTS) v24_sig |= RFCOMM_V24_RTR; - if (set & TIOCM_RI) - v24_sig |= RFCOMM_V24_IC; - if (set & TIOCM_CD) - v24_sig |= RFCOMM_V24_DV; - if (clear & TIOCM_DSR || clear & TIOCM_DTR) + if (clear & TIOCM_DTR) v24_sig &= ~RFCOMM_V24_RTC; - if (clear & TIOCM_RTS || clear & TIOCM_CTS) + if (clear & TIOCM_RTS) v24_sig &= ~RFCOMM_V24_RTR; - if (clear & TIOCM_RI) - v24_sig &= ~RFCOMM_V24_IC; - if (clear & TIOCM_CD) - v24_sig &= ~RFCOMM_V24_DV; rfcomm_dlc_set_modem_status(dlc, v24_sig); From 8c5fa3764facaad6d38336e90f406c2c11d69733 Mon Sep 17 00:00:00 2001 From: Fangyu Yu Date: Tue, 21 Oct 2025 22:21:31 +0800 Subject: [PATCH 0399/1075] RISC-V: KVM: Remove automatic I/O mapping for VM_PFNMAP As of commit aac6db75a9fc ("vfio/pci: Use unmap_mapping_range()"), vm_pgoff may no longer guaranteed to hold the PFN for VM_PFNMAP regions. Using vma->vm_pgoff to derive the HPA here may therefore produce incorrect mappings. Instead, I/O mappings for such regions can be established on-demand during g-stage page faults, making the upfront ioremap in this path is unnecessary. Fixes: aac6db75a9fc ("vfio/pci: Use unmap_mapping_range()") Signed-off-by: Fangyu Yu Tested-by: Daniel Henrique Barboza Reviewed-by: Guo Ren Reviewed-by: Anup Patel Link: https://lore.kernel.org/r/20251021142131.78796-1-fangyu.yu@linux.alibaba.com Signed-off-by: Anup Patel --- arch/riscv/kvm/mmu.c | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 525fb5a330c0..58f5f3536ffd 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -171,7 +171,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, enum kvm_mr_change change) { hva_t hva, reg_end, size; - gpa_t base_gpa; bool writable; int ret = 0; @@ -190,15 +189,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, hva = new->userspace_addr; size = new->npages << PAGE_SHIFT; reg_end = hva + size; - base_gpa = new->base_gfn << PAGE_SHIFT; writable = !(new->flags & KVM_MEM_READONLY); mmap_read_lock(current->mm); /* * A memory region could potentially cover multiple VMAs, and - * any holes between them, so iterate over all of them to find - * out if we can map any of them right now. + * any holes between them, so iterate over all of them. * * +--------------------------------------------+ * +---------------+----------------+ +----------------+ @@ -209,7 +206,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, */ do { struct vm_area_struct *vma; - hva_t vm_start, vm_end; + hva_t vm_end; vma = find_vma_intersection(current->mm, hva, reg_end); if (!vma) @@ -225,36 +222,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, } /* Take the intersection of this VMA with the memory region */ - vm_start = max(hva, vma->vm_start); vm_end = min(reg_end, vma->vm_end); if (vma->vm_flags & VM_PFNMAP) { - gpa_t gpa = base_gpa + (vm_start - hva); - phys_addr_t pa; - - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; - pa += vm_start - vma->vm_start; - /* IO region dirty page logging not allowed */ if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { ret = -EINVAL; goto out; } - - ret = kvm_riscv_mmu_ioremap(kvm, gpa, pa, vm_end - vm_start, - writable, false); - if (ret) - break; } hva = vm_end; } while (hva < reg_end); - if (change == KVM_MR_FLAGS_ONLY) - goto out; - - if (ret) - kvm_riscv_mmu_iounmap(kvm, base_gpa, size); - out: mmap_read_unlock(current->mm); return ret; From 033559473dd3b55558b535aa37b8848c207b5cbb Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Tue, 21 Oct 2025 17:09:51 +0100 Subject: [PATCH 0400/1075] dma-fence: Fix safe access wrapper to call timeline name method This commit fixes the wrapper function dma_fence_timeline_name(), that was added for safe access, to actually call the timeline name method of dma_fence_ops. Cc: # v6.17+ Signed-off-by: Akash Goel Fixes: 506aa8b02a8d ("dma-fence: Add safe access helpers and document the rules") Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://lore.kernel.org/r/20251021160951.1415603-1-akash.goel@arm.com --- drivers/dma-buf/dma-fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 3f78c56b58dc..39e6f93dc310 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -1141,7 +1141,7 @@ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence) "RCU protection is required for safe access to returned string"); if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) - return fence->ops->get_driver_name(fence); + return fence->ops->get_timeline_name(fence); else return "signaled-timeline"; } From 84dfce65a7ae7b11c7b13285a1b23e9a94ad37b7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Oct 2025 14:59:59 +0200 Subject: [PATCH 0401/1075] x86/bugs: Remove dead code which might prevent from building Clang, in particular, is not happy about dead code: arch/x86/kernel/cpu/bugs.c:1830:20: error: unused function 'match_option' [-Werror,-Wunused-function] 1830 | static inline bool match_option(const char *arg, int arglen, const char *opt) | ^~~~~~~~~~~~ 1 error generated. Remove a leftover from the previous cleanup. Fixes: 02ac6cc8c5a1 ("x86/bugs: Simplify SSB cmdline parsing") Signed-off-by: Andy Shevchenko Signed-off-by: Dave Hansen Link: https://patch.msgid.link/20251024125959.1526277-1-andriy.shevchenko%40linux.intel.com --- arch/x86/kernel/cpu/bugs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e08de5b0d20b..d7fa03bf51b4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1827,13 +1827,6 @@ void unpriv_ebpf_notify(int new_state) } #endif -static inline bool match_option(const char *arg, int arglen, const char *opt) -{ - int len = strlen(opt); - - return len == arglen && !strncmp(arg, opt, len); -} - /* The kernel command line selection for spectre v2 */ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_NONE, From 8ce93aabbf75171470e3d1be56bf1a6937dc5db8 Mon Sep 17 00:00:00 2001 From: Malin Jonsson Date: Fri, 24 Oct 2025 17:14:36 +0200 Subject: [PATCH 0402/1075] bpf: Conditionally include dynptr copy kfuncs Since commit a498ee7576de ("bpf: Implement dynptr copy kfuncs"), if CONFIG_BPF_EVENTS is not enabled, but BPF_SYSCALL and DEBUG_INFO_BTF are, the build will break like so: BTFIDS vmlinux.unstripped WARN: resolve_btfids: unresolved symbol bpf_probe_read_user_str_dynptr WARN: resolve_btfids: unresolved symbol bpf_probe_read_user_dynptr WARN: resolve_btfids: unresolved symbol bpf_probe_read_kernel_str_dynptr WARN: resolve_btfids: unresolved symbol bpf_probe_read_kernel_dynptr WARN: resolve_btfids: unresolved symbol bpf_copy_from_user_task_str_dynptr WARN: resolve_btfids: unresolved symbol bpf_copy_from_user_task_dynptr WARN: resolve_btfids: unresolved symbol bpf_copy_from_user_str_dynptr WARN: resolve_btfids: unresolved symbol bpf_copy_from_user_dynptr make[2]: *** [scripts/Makefile.vmlinux:72: vmlinux.unstripped] Error 255 make[2]: *** Deleting file 'vmlinux.unstripped' make[1]: *** [/repo/malin/upstream/linux/Makefile:1242: vmlinux] Error 2 make: *** [Makefile:248: __sub-make] Error 2 Guard these symbols with #ifdef CONFIG_BPF_EVENTS to resolve the problem. Fixes: a498ee7576de ("bpf: Implement dynptr copy kfuncs") Reported-by: Yong Gu Acked-by: Mykyta Yatsenko Signed-off-by: Malin Jonsson Link: https://lore.kernel.org/r/20251024151436.139131-1-malin.jonsson@est.tech Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 8eb117c52817..eb25e70e0bdc 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4345,6 +4345,7 @@ BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLE BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_local_irq_save) BTF_ID_FLAGS(func, bpf_local_irq_restore) +#ifdef CONFIG_BPF_EVENTS BTF_ID_FLAGS(func, bpf_probe_read_user_dynptr) BTF_ID_FLAGS(func, bpf_probe_read_kernel_dynptr) BTF_ID_FLAGS(func, bpf_probe_read_user_str_dynptr) @@ -4353,6 +4354,7 @@ BTF_ID_FLAGS(func, bpf_copy_from_user_dynptr, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_copy_from_user_str_dynptr, KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_copy_from_user_task_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_copy_from_user_task_str_dynptr, KF_SLEEPABLE | KF_TRUSTED_ARGS) +#endif #ifdef CONFIG_DMA_SHARED_BUFFER BTF_ID_FLAGS(func, bpf_iter_dmabuf_new, KF_ITER_NEW | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_dmabuf_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) From b2a578f3127ab9ef80114cef9b20a2b42a8ee77a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Oct 2025 16:08:24 +0200 Subject: [PATCH 0403/1075] soc: officially expand maintainership team Since Olof moved on from the soc tree maintenance, Arnd has mainly taken care of the day-to-day activities around the SoC tree by himself, which is generally not a good setup. Krzysztof, Linus and Alexandre have volunteered to become co-maintainers of the SoC tree, with the plan of taking turns to do merges and reviews to spread the workload. In addition, Drew joins as another reviewer. Acked-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Acked-by: Linus Walleij Acked-by: Drew Fustini Signed-off-by: Arnd Bergmann --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..bb627c2fb438 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1997,6 +1997,10 @@ F: include/uapi/linux/if_arcnet.h ARM AND ARM64 SoC SUB-ARCHITECTURES (COMMON PARTS) M: Arnd Bergmann +M: Krzysztof Kozlowski +M: Alexandre Belloni +M: Linus Walleij +R: Drew Fustini L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: soc@lists.linux.dev S: Maintained From 73ba88fb04081372a69f0395958ac6b65d53d134 Mon Sep 17 00:00:00 2001 From: Nirbhay Sharma Date: Sat, 25 Oct 2025 02:02:19 +0530 Subject: [PATCH 0404/1075] firewire: init_ohci1394_dma: add missing function parameter documentation Add missing kernel-doc parameter descriptions for five functions in init_ohci1394_dma.c to fix documentation warnings when building with W=1. This patch addresses the following warnings: - init_ohci1394_wait_for_busresets: missing @ohci description - init_ohci1394_enable_physical_dma: missing @ohci description - init_ohci1394_reset_and_init_dma: missing @ohci description - init_ohci1394_controller: missing @num, @slot, @func descriptions - setup_ohci1394_dma: missing @opt description Tested with GCC 13.2.0 and W=1 flag. All documentation warnings for these functions have been resolved. Signed-off-by: Nirbhay Sharma Link: https://lore.kernel.org/r/20251024203219.101990-2-nirbhay.lkd@gmail.com Signed-off-by: Takashi Sakamoto --- drivers/firewire/init_ohci1394_dma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/firewire/init_ohci1394_dma.c b/drivers/firewire/init_ohci1394_dma.c index 48b879e9e831..121f0c2f6401 100644 --- a/drivers/firewire/init_ohci1394_dma.c +++ b/drivers/firewire/init_ohci1394_dma.c @@ -167,6 +167,7 @@ static inline void __init init_ohci1394_initialize(struct ohci *ohci) /** * init_ohci1394_wait_for_busresets - wait until bus resets are completed + * @ohci: Pointer to the OHCI-1394 controller structure * * OHCI1394 initialization itself and any device going on- or offline * and any cable issue cause a IEEE1394 bus reset. The OHCI1394 spec @@ -189,6 +190,8 @@ static inline void __init init_ohci1394_wait_for_busresets(struct ohci *ohci) /** * init_ohci1394_enable_physical_dma - Enable physical DMA for remote debugging + * @ohci: Pointer to the OHCI-1394 controller structure + * * This enables remote DMA access over IEEE1394 from every host for the low * 4GB of address space. DMA accesses above 4GB are not available currently. */ @@ -201,6 +204,8 @@ static inline void __init init_ohci1394_enable_physical_dma(struct ohci *ohci) /** * init_ohci1394_reset_and_init_dma - init controller and enable DMA + * @ohci: Pointer to the OHCI-1394 controller structure + * * This initializes the given controller and enables physical DMA engine in it. */ static inline void __init init_ohci1394_reset_and_init_dma(struct ohci *ohci) @@ -230,6 +235,10 @@ static inline void __init init_ohci1394_reset_and_init_dma(struct ohci *ohci) /** * init_ohci1394_controller - Map the registers of the controller and init DMA + * @num: PCI bus number + * @slot: PCI device number + * @func: PCI function number + * * This maps the registers of the specified controller and initializes it */ static inline void __init init_ohci1394_controller(int num, int slot, int func) @@ -284,6 +293,7 @@ void __init init_ohci1394_dma_on_all_controllers(void) /** * setup_ohci1394_dma - enables early OHCI1394 DMA initialization + * @opt: Kernel command line parameter string */ static int __init setup_ohci1394_dma(char *opt) { From 53abe3e1c154628cc74e33a1bfcd865656e433a5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Oct 2025 11:19:34 +0200 Subject: [PATCH 0405/1075] sched: Remove never used code in mm_cid_get() Clang is not happy with set but unused variable (this is visible with `make W=1` build: kernel/sched/sched.h:3744:18: error: variable 'cpumask' set but not used [-Werror,-Wunused-but-set-variable] It seems like the variable was never used along with the assignment that does not have side effects as far as I can see. Remove those altogether. Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid") Signed-off-by: Andy Shevchenko Tested-by: Eric Biggers Reviewed-by: Breno Leitao Signed-off-by: Linus Torvalds --- kernel/sched/sched.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1f5d07067f60..361f9101cef9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3740,11 +3740,9 @@ static inline int mm_cid_get(struct rq *rq, struct task_struct *t, struct mm_struct *mm) { struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; - struct cpumask *cpumask; int cid; lockdep_assert_rq_held(rq); - cpumask = mm_cidmask(mm); cid = __this_cpu_read(pcpu_cid->cid); if (mm_cid_is_valid(cid)) { mm_cid_snapshot_time(rq, mm); From 84a905290cb4c3d9a71a9e3b2f2e02e031e7512f Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Thu, 23 Oct 2025 16:48:53 +0200 Subject: [PATCH 0406/1075] net: phy: dp83867: Disable EEE support as not implemented While the DP83867 PHYs report EEE capability through their feature registers, the actual hardware does not support EEE (see Links). When the connected MAC enables EEE, it causes link instability and communication failures. The issue is reproducible with a iMX8MP and relevant stmmac ethernet port. Since the introduction of phylink-managed EEE support in the stmmac driver, EEE is now enabled by default, leading to issues on systems using the DP83867 PHY. Call phy_disable_eee during phy initialization to prevent EEE from being enabled on DP83867 PHYs. Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/1445244/dp83867ir-dp83867-disable-eee-lpi Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/658638/dp83867ir-eee-energy-efficient-ethernet Fixes: 2a10154abcb7 ("net: phy: dp83867: Add TI dp83867 phy") Cc: stable@vger.kernel.org Signed-off-by: Emanuele Ghidoli Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251023144857.529566-1-ghidoliemanuele@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83867.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index deeefb962566..36a0c1b7f59c 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -738,6 +738,12 @@ static int dp83867_config_init(struct phy_device *phydev) return ret; } + /* Although the DP83867 reports EEE capability through the + * MDIO_PCS_EEE_ABLE and MDIO_AN_EEE_ADV registers, the feature + * is not actually implemented in hardware. + */ + phy_disable_eee(phydev); + if (phy_interface_is_rgmii(phydev) || phydev->interface == PHY_INTERFACE_MODE_SGMII) { val = phy_read(phydev, MII_DP83867_PHYCTRL); From 73b7e48a87c79fd6df40aeafcd3994287e31e25f Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 22 Oct 2025 15:20:51 -0700 Subject: [PATCH 0407/1075] drm/msm: Reject MAP_NULL op if no PRR We need PRR support in order to implement MAP_NULL. Userspace shouldn't be trying to use this if it is unsupported. Reported-by: Valentine Burley Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37935#note_3153730 Signed-off-by: Rob Clark Tested-by: Valentine Burley Patchwork: https://patchwork.freedesktop.org/patch/682941/ Message-ID: <20251022222051.10030-1-robin.clark@oss.qualcomm.com> --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 7 ------- drivers/gpu/drm/msm/msm_gem_vma.c | 6 ++++++ drivers/gpu/drm/msm/msm_gpu.h | 11 +++++++++++ 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index afaa3cfefd35..4b5a4edd0702 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -348,13 +348,6 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, return 0; } -static bool -adreno_smmu_has_prr(struct msm_gpu *gpu) -{ - struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&gpu->pdev->dev); - return adreno_smmu && adreno_smmu->set_prr_addr; -} - int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, uint32_t param, uint64_t *value, uint32_t *len) { diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 381a0853c05b..0d219454f0e6 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -964,6 +964,7 @@ static int lookup_op(struct msm_vm_bind_job *job, const struct drm_msm_vm_bind_op *op) { struct drm_device *dev = job->vm->drm; + struct msm_drm_private *priv = dev->dev_private; int i = job->nr_ops++; int ret = 0; @@ -1010,6 +1011,11 @@ lookup_op(struct msm_vm_bind_job *job, const struct drm_msm_vm_bind_op *op) break; } + if ((op->op == MSM_VM_BIND_OP_MAP_NULL) && + !adreno_smmu_has_prr(priv->gpu)) { + ret = UERR(EINVAL, dev, "PRR not supported\n"); + } + return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index a597f2bee30b..2894fc118485 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -299,6 +299,17 @@ static inline struct msm_gpu *dev_to_gpu(struct device *dev) return container_of(adreno_smmu, struct msm_gpu, adreno_smmu); } +static inline bool +adreno_smmu_has_prr(struct msm_gpu *gpu) +{ + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&gpu->pdev->dev); + + if (!adreno_smmu) + return false; + + return adreno_smmu && adreno_smmu->set_prr_addr; +} + /* It turns out that all targets use the same ringbuffer size */ #define MSM_GPU_RINGBUFFER_SZ SZ_32K #define MSM_GPU_RINGBUFFER_BLKSIZE 32 From 00d5f09719aa6c37545be5c05d25a1eaf8f3da7e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 22 Oct 2025 15:20:38 -0700 Subject: [PATCH 0408/1075] drm/msm: Ensure vm is created in VM_BIND ioctl Since the vm is lazily created, to allow userspace to opt-in to a VM_BIND context, we can't assume it is already created. Fixes: 2e6a8a1fe2b2 ("drm/msm: Add VM_BIND ioctl") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/682939/ Message-ID: <20251022222039.9937-1-robin.clark@oss.qualcomm.com> --- drivers/gpu/drm/msm/msm_gem_vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c index 0d219454f0e6..c112a46a5484 100644 --- a/drivers/gpu/drm/msm/msm_gem_vma.c +++ b/drivers/gpu/drm/msm/msm_gem_vma.c @@ -1407,7 +1407,7 @@ msm_ioctl_vm_bind(struct drm_device *dev, void *data, struct drm_file *file) * Maybe we could allow just UNMAP ops? OTOH userspace should just * immediately close the device file and all will be torn down. */ - if (to_msm_vm(ctx->vm)->unusable) + if (to_msm_vm(msm_context_vm(dev, ctx))->unusable) return UERR(EPIPE, dev, "context is unusable"); /* From 5ff90d427ef841fa48608d0c19a81c48d6126d46 Mon Sep 17 00:00:00 2001 From: James Le Cuirot Date: Thu, 16 Oct 2025 10:14:17 +0100 Subject: [PATCH 0409/1075] kbuild: install-extmod-build: Fix when given dir outside the build dir Commit b5e395653546 ("kbuild: install-extmod-build: Fix build when specifying KBUILD_OUTPUT") tried to address the "build" variable expecting a relative path by using `realpath --relative-base=.`, but this only works when the given directory is below the current directory. `realpath --relative-to=.` will return a relative path in all cases. Fixes: b5e395653546 ("kbuild: install-extmod-build: Fix build when specifying KBUILD_OUTPUT") Signed-off-by: James Le Cuirot Reviewed-by: Nicolas Schier Link: https://patch.msgid.link/20251016091417.9985-1-chewi@gentoo.org Signed-off-by: Nathan Chancellor --- scripts/package/install-extmod-build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index b96538787f3d..054fdf45cc37 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -63,7 +63,7 @@ if [ "${CC}" != "${HOSTCC}" ]; then # Clear VPATH and srcroot because the source files reside in the output # directory. # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"$(realpath --relative-base=. "${destdir}")"/scripts + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"$(realpath --relative-to=. "${destdir}")"/scripts rm -f "${destdir}/scripts/Kbuild" fi From 14e02ed3876f4ab0ed6d3f41972175f8b8df3d70 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 17 Oct 2025 11:13:36 +0200 Subject: [PATCH 0410/1075] drm/sysfb: Do not dereference NULL pointer in plane reset The plane state in __drm_gem_reset_shadow_plane() can be NULL. Do not deref that pointer, but forward NULL to the other plane-reset helpers. Clears plane->state to NULL. v2: - fix typo in commit description (Javier) Signed-off-by: Thomas Zimmermann Fixes: b71565022031 ("drm/gem: Export implementation of shadow-plane helpers") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/dri-devel/aPIDAsHIUHp_qSW4@stanley.mountain/ Cc: Thomas Zimmermann Cc: Melissa Wen Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Simona Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v5.15+ Reviewed-by: Javier Martinez Canillas Link: https://patch.msgid.link/20251017091407.58488-1-tzimmermann@suse.de --- drivers/gpu/drm/drm_gem_atomic_helper.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c index ebf305fb24f0..6fb55601252f 100644 --- a/drivers/gpu/drm/drm_gem_atomic_helper.c +++ b/drivers/gpu/drm/drm_gem_atomic_helper.c @@ -310,8 +310,12 @@ EXPORT_SYMBOL(drm_gem_destroy_shadow_plane_state); void __drm_gem_reset_shadow_plane(struct drm_plane *plane, struct drm_shadow_plane_state *shadow_plane_state) { - __drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base); - drm_format_conv_state_init(&shadow_plane_state->fmtcnv_state); + if (shadow_plane_state) { + __drm_atomic_helper_plane_reset(plane, &shadow_plane_state->base); + drm_format_conv_state_init(&shadow_plane_state->fmtcnv_state); + } else { + __drm_atomic_helper_plane_reset(plane, NULL); + } } EXPORT_SYMBOL(__drm_gem_reset_shadow_plane); From dcb6fa37fd7bc9c3d2b066329b0d27dedf8becaa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Oct 2025 15:59:49 -0700 Subject: [PATCH 0411/1075] Linux 6.18-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d14824792227..b34a1f4c0396 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* From f765fdfcd8b5bce92c6aa1a517ff549529ddf590 Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 24 Oct 2025 21:17:01 -0500 Subject: [PATCH 0412/1075] cifs: fix typo in enable_gcm_256 module parameter Fix typo in description of enable_gcm_256 module parameter Suggested-by: Thomas Spear Signed-off-by: Steve French --- fs/smb/client/cifsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 4f959f1e08d2..185ac41bd7e9 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -173,7 +173,7 @@ module_param(enable_oplocks, bool, 0644); MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1"); module_param(enable_gcm_256, bool, 0644); -MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/0"); +MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/1"); module_param(require_gcm_256, bool, 0644); MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0"); From 6f40e50ceb99fc8ef37e5c56e2ec1d162733fef0 Mon Sep 17 00:00:00 2001 From: Qianchang Zhao Date: Wed, 22 Oct 2025 15:27:47 +0900 Subject: [PATCH 0413/1075] ksmbd: transport_ipc: validate payload size before reading handle handle_response() dereferences the payload as a 4-byte handle without verifying that the declared payload size is at least 4 bytes. A malformed or truncated message from ksmbd.mountd can lead to a 4-byte read past the declared payload size. Validate the size before dereferencing. This is a minimal fix to guard the initial handle read. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: stable@vger.kernel.org Reported-by: Qianchang Zhao Signed-off-by: Qianchang Zhao Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_ipc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 46f87fd1ce1c..2c08cccfa680 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -263,10 +263,16 @@ static void ipc_msg_handle_free(int handle) static int handle_response(int type, void *payload, size_t sz) { - unsigned int handle = *(unsigned int *)payload; + unsigned int handle; struct ipc_msg_table_entry *entry; int ret = 0; + /* Prevent 4-byte read beyond declared payload size */ + if (sz < sizeof(unsigned int)) + return -EINVAL; + + handle = *(unsigned int *)payload; + ipc_update_last_active(); down_read(&ipc_msg_table_lock); hash_for_each_possible(ipc_msg_table, entry, ipc_table_hlist, handle) { From 975f05a7647720b6a82dac73463eaeca3067de71 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 16:07:13 +0200 Subject: [PATCH 0414/1075] smb: server: call smb_direct_post_recv_credits() when the negotiation is done We now activate sc->recv_io.posted.refill_work and sc->idle.immediate_work only after a successful negotiation, before sending the negotiation response. It means the queue_work(sc->workqueue, &sc->recv_io.posted.refill_work) in put_recvmsg() of the negotiate request, is a no-op now. It also means our explicit smb_direct_post_recv_credits() will have queue_work(sc->workqueue, &sc->idle.immediate_work) as no-op. This should make sure we don't have races and post any immediate data_transfer message that tries to grant credits to the peer, before we send the negotiation response, as that will grant the initial credits to the peer. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Fixes: 1cde0a74a7a8 ("smb: server: don't use delayed_work for post_recv_credits_work") Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 36 ++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 89b02efdba0c..e70fc447e815 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -418,9 +418,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) sc->ib.dev = sc->rdma.cm_id->device; - INIT_WORK(&sc->recv_io.posted.refill_work, - smb_direct_post_recv_credits); - INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer); conn = ksmbd_conn_alloc(); @@ -1904,7 +1901,6 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) goto out_err; } - smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); return 0; out_err: put_recvmsg(sc, recvmsg); @@ -2249,8 +2245,8 @@ static int smb_direct_prepare(struct ksmbd_transport *t) return -ECONNABORTED; ret = smb_direct_check_recvmsg(recvmsg); - if (ret == -ECONNABORTED) - goto out; + if (ret) + goto put; req = (struct smbdirect_negotiate_req *)recvmsg->packet; sp->max_recv_size = min_t(int, sp->max_recv_size, @@ -2265,14 +2261,38 @@ static int smb_direct_prepare(struct ksmbd_transport *t) sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1); - ret = smb_direct_send_negotiate_response(sc, ret); -out: +put: spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); sc->recv_io.reassembly.queue_length--; list_del(&recvmsg->list); spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); put_recvmsg(sc, recvmsg); + if (ret == -ECONNABORTED) + return ret; + + if (ret) + goto respond; + + /* + * We negotiated with success, so we need to refill the recv queue. + * We do that with sc->idle.immediate_work still being disabled + * via smbdirect_socket_init(), so that queue_work(sc->workqueue, + * &sc->idle.immediate_work) in smb_direct_post_recv_credits() + * is a no-op. + * + * The message that grants the credits to the client is + * the negotiate response. + */ + INIT_WORK(&sc->recv_io.posted.refill_work, smb_direct_post_recv_credits); + smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); + if (unlikely(sc->first_error)) + return sc->first_error; + INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); + +respond: + ret = smb_direct_send_negotiate_response(sc, ret); + return ret; } From f574069c5c55ebe642f899a01c8f127d845fd562 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 16:07:53 +0200 Subject: [PATCH 0415/1075] smb: server: let smb_direct_cm_handler() call ib_drain_qp() after smb_direct_disconnect_rdma_work() All handlers triggered by ib_drain_qp() should already see the broken connection. smb_direct_cm_handler() is called under a mutex of the rdma_cm, we should make sure ib_drain_qp() and all rdma layer logic completes and unlocks the mutex. It means free_transport() will also already see the connection as SMBDIRECT_SOCKET_DISCONNECTED, so we need to call crdma_[un]lock_handler(sc->rdma.cm_id) around ib_drain_qp(), rdma_destroy_qp(), ib_free_cq() and ib_dealloc_pd(). Otherwise we free resources while the ib_drain_qp() within smb_direct_cm_handler() is still running. We have to unlock before rdma_destroy_id() as it locks again. Fixes: 141fa9824c0f ("ksmbd: call ib_drain_qp when disconnected") Fixes: 4c564f03e23b ("smb: server: make use of common smbdirect_socket") Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index e70fc447e815..7d86553fcc7c 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -466,6 +466,9 @@ static void free_transport(struct smb_direct_transport *t) disable_delayed_work_sync(&sc->idle.timer_work); disable_work_sync(&sc->idle.immediate_work); + if (sc->rdma.cm_id) + rdma_lock_handler(sc->rdma.cm_id); + if (sc->ib.qp) { ib_drain_qp(sc->ib.qp); sc->ib.qp = NULL; @@ -494,8 +497,10 @@ static void free_transport(struct smb_direct_transport *t) ib_free_cq(sc->ib.recv_cq); if (sc->ib.pd) ib_dealloc_pd(sc->ib.pd); - if (sc->rdma.cm_id) + if (sc->rdma.cm_id) { + rdma_unlock_handler(sc->rdma.cm_id); rdma_destroy_id(sc->rdma.cm_id); + } smb_direct_destroy_pools(sc); ksmbd_conn_free(KSMBD_TRANS(t)->conn); @@ -1724,10 +1729,10 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, } case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: { - ib_drain_qp(sc->ib.qp); - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; smb_direct_disconnect_rdma_work(&sc->disconnect_work); + if (sc->ib.qp) + ib_drain_qp(sc->ib.qp); break; } case RDMA_CM_EVENT_CONNECT_ERROR: { From 75cdae446ddffe0a6a991bbb146dee51d9d4c865 Mon Sep 17 00:00:00 2001 From: Roy Vegard Ovesen Date: Mon, 20 Oct 2025 22:15:08 +0200 Subject: [PATCH 0416/1075] ALSA: usb-audio: don't log messages meant for 1810c when initializing 1824c The log messages for the PreSonus STUDIO 1810c about device_setup are not applicable to the 1824c, and should not be logged when 1824c initializes. Refactor from if statement to switch statement as there might be more STUDIO series devices added later. Fixes: 080564558eb1 ("ALSA: usb-audio: enable support for Presonus Studio 1824c within 1810c file") Signed-off-by: Roy Vegard Ovesen Link: https://patch.msgid.link/aPaYTP7ceuABf8c7@ark Signed-off-by: Takashi Iwai --- sound/usb/mixer_s1810c.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/sound/usb/mixer_s1810c.c b/sound/usb/mixer_s1810c.c index 89c652434f71..6e09e074c0e7 100644 --- a/sound/usb/mixer_s1810c.c +++ b/sound/usb/mixer_s1810c.c @@ -597,15 +597,6 @@ int snd_sc1810_init_mixer(struct usb_mixer_interface *mixer) if (!list_empty(&chip->mixer_list)) return 0; - dev_info(&dev->dev, - "Presonus Studio 1810c, device_setup: %u\n", chip->setup); - if (chip->setup == 1) - dev_info(&dev->dev, "(8out/18in @ 48kHz)\n"); - else if (chip->setup == 2) - dev_info(&dev->dev, "(6out/8in @ 192kHz)\n"); - else - dev_info(&dev->dev, "(8out/14in @ 96kHz)\n"); - ret = snd_s1810c_init_mixer_maps(chip); if (ret < 0) return ret; @@ -634,16 +625,28 @@ int snd_sc1810_init_mixer(struct usb_mixer_interface *mixer) if (ret < 0) return ret; - // The 1824c has a Mono Main switch instead of a - // A/B select switch. - if (mixer->chip->usb_id == USB_ID(0x194f, 0x010d)) { - ret = snd_s1810c_switch_init(mixer, &snd_s1824c_mono_sw); - if (ret < 0) - return ret; - } else if (mixer->chip->usb_id == USB_ID(0x194f, 0x010c)) { + switch (chip->usb_id) { + case USB_ID(0x194f, 0x010c): /* Presonus Studio 1810c */ + dev_info(&dev->dev, + "Presonus Studio 1810c, device_setup: %u\n", chip->setup); + if (chip->setup == 1) + dev_info(&dev->dev, "(8out/18in @ 48kHz)\n"); + else if (chip->setup == 2) + dev_info(&dev->dev, "(6out/8in @ 192kHz)\n"); + else + dev_info(&dev->dev, "(8out/14in @ 96kHz)\n"); + ret = snd_s1810c_switch_init(mixer, &snd_s1810c_ab_sw); if (ret < 0) return ret; + + break; + case USB_ID(0x194f, 0x010d): /* Presonus Studio 1824c */ + ret = snd_s1810c_switch_init(mixer, &snd_s1824c_mono_sw); + if (ret < 0) + return ret; + + break; } return ret; From c4b67b514af8c2d73c64b36e0cd99e9b26b9ac82 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 16 Oct 2025 19:40:48 +0800 Subject: [PATCH 0417/1075] RDMA/hns: Fix recv CQ and QP cache affinity Currently driver enforces affinity between QP cache and send CQ cache, which helps improve the performance of sending, but doesn't set affinity with recv CQ cache, resulting in suboptimal performance of receiving. Use one CQ bank per context to ensure the affinity among QP, send CQ and recv CQ. For kernel ULP, CQ bank is fixed to 0. Fixes: 9e03dbea2b06 ("RDMA/hns: Fix CQ and QP cache affinity") Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20251016114051.1963197-2-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_cq.c | 58 +++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 4 ++ 3 files changed, 63 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 3a5c93c9fb3e..6aa82fe9dd3d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include #include #include #include "hns_roce_device.h" @@ -37,6 +38,43 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + cq_table->ctx_num[uctx->cq_bank_id]--; + mutex_unlock(&cq_table->bank_mutex); +} + +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + u32 least_load = cq_table->ctx_num[0]; + u8 bankid = 0; + u8 i; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + if (cq_table->ctx_num[i] < least_load) { + least_load = cq_table->ctx_num[i]; + bankid = i; + } + } + cq_table->ctx_num[bankid]++; + mutex_unlock(&cq_table->bank_mutex); + + uctx->cq_bank_id = bankid; +} + static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) { u32 least_load = bank[0].inuse; @@ -55,7 +93,21 @@ static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) return bankid; } -static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static u8 select_cq_bankid(struct hns_roce_dev *hr_dev, + struct hns_roce_bank *bank, struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = udata ? + rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext) : NULL; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return uctx ? uctx->cq_bank_id : 0; + + return get_least_load_bankid_for_cq(bank); +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct hns_roce_bank *bank; @@ -63,7 +115,7 @@ static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) int id; mutex_lock(&cq_table->bank_mutex); - bankid = get_least_load_bankid_for_cq(cq_table->bank); + bankid = select_cq_bankid(hr_dev, cq_table->bank, udata); bank = &cq_table->bank[bankid]; id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); @@ -396,7 +448,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; } - ret = alloc_cqn(hr_dev, hr_cq); + ret = alloc_cqn(hr_dev, hr_cq, udata); if (ret) { ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); goto err_cq_db; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 78ee04a48a74..06832c0ac055 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -217,6 +217,7 @@ struct hns_roce_ucontext { struct mutex page_mutex; struct hns_user_mmap_entry *db_mmap_entry; u32 config; + u8 cq_bank_id; }; struct hns_roce_pd { @@ -495,6 +496,7 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; struct mutex bank_mutex; + u32 ctx_num[HNS_ROCE_CQ_BANK_NUM]; }; struct hns_roce_srq_table { @@ -1305,5 +1307,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, enum hns_roce_mmap_type mmap_type); bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl); +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d50f36f8a110..f3607fe107a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -425,6 +425,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, if (ret) goto error_fail_copy_to_udata; + hns_roce_get_cq_bankid_for_uctx(context); + return 0; error_fail_copy_to_udata: @@ -447,6 +449,8 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device); + hns_roce_put_cq_bankid_for_uctx(context); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&context->page_mutex); From f5a7cbea5411668d429eb4ffe96c4063fe8dac9e Mon Sep 17 00:00:00 2001 From: wenglianfa Date: Thu, 16 Oct 2025 19:40:49 +0800 Subject: [PATCH 0418/1075] RDMA/hns: Fix the modification of max_send_sge The actual sge number may exceed the value specified in init_attr->cap when HW needs extra sge to enable inline feature. Since these extra sges are not expected by ULP, return the user-specified value to ULP instead of the expanded sge number. Fixes: 0c5e259b06a8 ("RDMA/hns: Fix incorrect sge nums calculation") Signed-off-by: wenglianfa Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20251016114051.1963197-3-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6ff1b8ce580c..bdd879ac12dd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -662,7 +662,6 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; - cap->max_send_sge = hr_qp->sq.max_gs; return 0; } @@ -744,7 +743,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, /* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; - cap->max_send_sge = hr_qp->sq.max_gs; return 0; } From fe9622011f955e35ba84d3af7b2f2fed31cf8ca1 Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Thu, 16 Oct 2025 19:40:50 +0800 Subject: [PATCH 0419/1075] RDMA/hns: Fix wrong WQE data when QP wraps around When QP wraps around, WQE data from the previous use at the same position still remains as driver does not clear it. The WQE field layout differs across different opcodes, causing that the fields that are not explicitly assigned for the current opcode retain stale values, and are issued to HW by mistake. Such fields are as follows: * MSG_START_SGE_IDX field in ATOMIC WQE * BLOCK_SIZE and ZBVA fields in FRMR WQE * DirectWQE fields when DirectWQE not used For ATOMIC WQE, always set the latest sge index in MSG_START_SGE_IDX as required by HW. For FRMR WQE and DirectWQE, clear only those unassigned fields instead of the entire WQE to avoid performance penalty. Fixes: 68a997c5d28c ("RDMA/hns: Add FRMR support for hip08") Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20251016114051.1963197-4-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f82bdd46a917..ab378525b296 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -165,6 +165,8 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, hr_reg_write(fseg, FRMR_PBL_BUF_PG_SZ, to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); hr_reg_clear(fseg, FRMR_BLK_MODE); + hr_reg_clear(fseg, FRMR_BLOCK_SIZE); + hr_reg_clear(fseg, FRMR_ZBVA); } static void set_atomic_seg(const struct ib_send_wr *wr, @@ -339,9 +341,6 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, int j = 0; int i; - hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, - (*sge_ind) & (qp->sge.sge_cnt - 1)); - hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE, !!(wr->send_flags & IB_SEND_INLINE)); if (wr->send_flags & IB_SEND_INLINE) @@ -586,6 +585,9 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); + hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX, + curr_idx & (qp->sge.sge_cnt - 1)); + if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { if (msg_len != ATOMIC_WR_LEN) @@ -734,6 +736,9 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + /* RC and UD share the same DirectWQE field layout */ + ((struct hns_roce_v2_rc_send_wqe *)wqe)->byte_4 = 0; + /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_RC) ret = set_rc_wqe(qp, wr, wqe, &sge_idx, owner_bit); From b8c9aab4c738e5e9814915768ac6c184fe36ab93 Mon Sep 17 00:00:00 2001 From: Guofeng Yue Date: Thu, 16 Oct 2025 19:40:51 +0800 Subject: [PATCH 0420/1075] RDMA/hns: Remove an extra blank line Remove an extra blank line. Signed-off-by: Guofeng Yue Signed-off-by: Junxian Huang Link: https://patch.msgid.link/20251016114051.1963197-5-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ab378525b296..63052c0e7613 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7053,7 +7053,6 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_roce_init; } - handle->priv = hr_dev; return 0; From d914ec6f07548f7c13a231a4f526e043e736e82e Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Mon, 27 Oct 2025 18:33:33 +0800 Subject: [PATCH 0421/1075] ASoC: rt721: fix prepare clock stop failed This patch adds settings to prevent the 'prepare clock stop failed' error. Signed-off-by: Shuming Fan Link: https://patch.msgid.link/20251027103333.38353-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt721-sdca.c | 4 ++++ sound/soc/codecs/rt721-sdca.h | 1 + 2 files changed, 5 insertions(+) diff --git a/sound/soc/codecs/rt721-sdca.c b/sound/soc/codecs/rt721-sdca.c index a4bd29d7220b..5f7b505d5414 100644 --- a/sound/soc/codecs/rt721-sdca.c +++ b/sound/soc/codecs/rt721-sdca.c @@ -281,6 +281,10 @@ static void rt721_sdca_jack_preset(struct rt721_sdca_priv *rt721) rt_sdca_index_write(rt721->mbq_regmap, RT721_BOOST_CTRL, RT721_BST_4CH_TOP_GATING_CTRL1, 0x002a); regmap_write(rt721->regmap, 0x2f58, 0x07); + + regmap_write(rt721->regmap, 0x2f51, 0x00); + rt_sdca_index_write(rt721->mbq_regmap, RT721_HDA_SDCA_FLOAT, + RT721_MISC_CTL, 0x0004); } static void rt721_sdca_jack_init(struct rt721_sdca_priv *rt721) diff --git a/sound/soc/codecs/rt721-sdca.h b/sound/soc/codecs/rt721-sdca.h index 71fac9cd8739..24ce188562ba 100644 --- a/sound/soc/codecs/rt721-sdca.h +++ b/sound/soc/codecs/rt721-sdca.h @@ -137,6 +137,7 @@ struct rt721_sdca_dmic_kctrl_priv { #define RT721_HDA_LEGACY_UAJ_CTL 0x02 #define RT721_HDA_LEGACY_CTL1 0x05 #define RT721_HDA_LEGACY_RESET_CTL 0x06 +#define RT721_MISC_CTL 0x07 #define RT721_XU_REL_CTRL 0x0c #define RT721_GE_REL_CTRL1 0x0d #define RT721_HDA_LEGACY_GPIO_WAKE_EN_CTL 0x0e From d25e3a610bae03bffc5c14b5d944a5d0cd844678 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Wed, 22 Oct 2025 08:34:03 +0200 Subject: [PATCH 0422/1075] drm/sched: Fix race in drm_sched_entity_select_rq() In a past bug fix it was forgotten that entity access must be protected by the entity lock. That's a data race and potentially UB. Move the spin_unlock() to the appropriate position. Cc: stable@vger.kernel.org # v5.13+ Fixes: ac4eb83ab255 ("drm/sched: select new rq even if there is only one v3") Reviewed-by: Tvrtko Ursulin Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251022063402.87318-2-phasta@kernel.org --- drivers/gpu/drm/scheduler/sched_entity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 5a4697f636f2..aa222166de58 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -552,10 +552,11 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity) drm_sched_rq_remove_entity(entity->rq, entity); entity->rq = rq; } - spin_unlock(&entity->lock); if (entity->num_sched_list == 1) entity->sched_list = NULL; + + spin_unlock(&entity->lock); } /** From e0023c8a74028739643aa14bd201c41a99866ca4 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 24 Oct 2025 18:12:22 +0200 Subject: [PATCH 0423/1075] drm/nouveau: Fix race in nouveau_sched_fini() nouveau_sched_fini() uses a memory barrier before wait_event(). wait_event(), however, is a macro which expands to a loop which might check the passed condition several times. The barrier would only take effect for the first check. Replace the barrier with a function which takes the spinlock. Cc: stable@vger.kernel.org # v6.8+ Fixes: 5f03a507b29e ("drm/nouveau: implement 1:1 scheduler - entity relationship") Acked-by: Danilo Krummrich Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251024161221.196155-2-phasta@kernel.org --- drivers/gpu/drm/nouveau/nouveau_sched.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index e60f7892f5ce..a7bf539e5d86 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -482,6 +482,17 @@ nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, return 0; } +static bool +nouveau_sched_job_list_empty(struct nouveau_sched *sched) +{ + bool empty; + + spin_lock(&sched->job.list.lock); + empty = list_empty(&sched->job.list.head); + spin_unlock(&sched->job.list.lock); + + return empty; +} static void nouveau_sched_fini(struct nouveau_sched *sched) @@ -489,8 +500,7 @@ nouveau_sched_fini(struct nouveau_sched *sched) struct drm_gpu_scheduler *drm_sched = &sched->base; struct drm_sched_entity *entity = &sched->entity; - rmb(); /* for list_empty to work without lock */ - wait_event(sched->job.wq, list_empty(&sched->job.list.head)); + wait_event(sched->job.wq, nouveau_sched_job_list_empty(sched)); drm_sched_entity_fini(entity); drm_sched_fini(drm_sched); From db86f55bf81a3a297be05ee8775ae9a8c6e3a599 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 Oct 2025 19:12:57 +0200 Subject: [PATCH 0424/1075] cpuidle: governors: menu: Select polling state in some more cases A throughput regression of 11% introduced by commit 779b1a1cb13a ("cpuidle: governors: menu: Avoid selecting states with too much latency") has been reported and it is related to the case when the menu governor checks if selecting a proper idle state instead of a polling one makes sense. In particular, it is questionable to do so if the exit latency of the idle state in question exceeds the predicted idle duration, so add a check for that, which is sufficient to make the reported regression go away, and update the related code comment accordingly. Fixes: 779b1a1cb13a ("cpuidle: governors: menu: Avoid selecting states with too much latency") Closes: https://lore.kernel.org/linux-pm/004501dc43c9$ec8aa930$c59ffb90$@telus.net/ Reported-by: Doug Smythies Tested-by: Doug Smythies Cc: All applicable Signed-off-by: Rafael J. Wysocki Reviewed-by: Christian Loehle Link: https://patch.msgid.link/12786727.O9o76ZdvQC@rafael.j.wysocki --- drivers/cpuidle/governors/menu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 7d21fb5a72f4..23239b0c04f9 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -318,10 +318,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, /* * Use a physical idle state, not busy polling, unless a timer - * is going to trigger soon enough. + * is going to trigger soon enough or the exit latency of the + * idle state in question is greater than the predicted idle + * duration. */ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) && - s->target_residency_ns <= data->next_timer_ns) { + s->target_residency_ns <= data->next_timer_ns && + s->exit_latency_ns <= predicted_ns) { predicted_ns = s->target_residency_ns; idx = i; break; From 28935ee5e4789ad86c08ba9f2426edd6203d13fa Mon Sep 17 00:00:00 2001 From: Eren Demir Date: Mon, 27 Oct 2025 13:58:10 +0300 Subject: [PATCH 0425/1075] ALSA: hda/realtek: Fix mute led for HP Victus 15-fa1xxx (MB 8C2D) The quirk for Victus 15-fa1xxx wasn't working on Victus 15-fa1031nt due to a different board id. This patch enables the existing quirk for the board id 8BC8. Tested on HP Victus 15-fa1031nt (MB 8C2D). The LED behaviour works as intended. Signed-off-by: Eren Demir Link: https://patch.msgid.link/20251027110208.6481-1-eren.demir2479090@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 517e2cd6ad35..e448c0c21b57 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6578,6 +6578,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8c16, "HP Spectre x360 2-in-1 Laptop 16-aa0xxx", ALC245_FIXUP_HP_SPECTRE_X360_16_AA0XXX), SND_PCI_QUIRK(0x103c, 0x8c17, "HP Spectre 16", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8c21, "HP Pavilion Plus Laptop 14-ey0XXX", ALC245_FIXUP_HP_X360_MUTE_LEDS), + SND_PCI_QUIRK(0x103c, 0x8c2d, "HP Victus 15-fa1xxx (MB 8C2D)", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8c30, "HP Victus 15-fb1xxx", ALC245_FIXUP_HP_MUTE_LED_COEFBIT), SND_PCI_QUIRK(0x103c, 0x8c46, "HP EliteBook 830 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8c47, "HP EliteBook 840 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), From 2469bb6a6af944755a7d7daf66be90f3b8decbf9 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Mon, 27 Oct 2025 09:49:12 +0800 Subject: [PATCH 0426/1075] Revert "wifi: ath10k: avoid unnecessary wait for service ready message" This reverts commit 51a73f1b2e56b0324b4a3bb8cebc4221b5be4c7a. Although this commit benefits QCA6174, it breaks QCA988x and QCA9984 [1][2]. Since it is not likely to root cause/fix this issue in a short time, revert it to get those chips back. Compile tested only. Fixes: 51a73f1b2e56 ("wifi: ath10k: avoid unnecessary wait for service ready message") Link: https://lore.kernel.org/ath10k/6d41bc00602c33ffbf68781f563ff2e6c6915a3e.camel@gmail.com # [1] Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220671 # [2] Signed-off-by: Baochen Qiang Reviewed-by: Vasanthakumar Thiagarajan Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251027-ath10k-revert-polling-first-change-v1-1-89aaf3bcbfa1@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/wmi.c | 39 ++++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index b3b00d324075..b4aad6604d6d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1764,32 +1764,33 @@ void ath10k_wmi_put_wmi_channel(struct ath10k *ar, struct wmi_channel *ch, int ath10k_wmi_wait_for_service_ready(struct ath10k *ar) { - unsigned long timeout = jiffies + WMI_SERVICE_READY_TIMEOUT_HZ; unsigned long time_left, i; - /* Sometimes the PCI HIF doesn't receive interrupt - * for the service ready message even if the buffer - * was completed. PCIe sniffer shows that it's - * because the corresponding CE ring doesn't fires - * it. Workaround here by polling CE rings. Since - * the message could arrive at any time, continue - * polling until timeout. - */ - do { + time_left = wait_for_completion_timeout(&ar->wmi.service_ready, + WMI_SERVICE_READY_TIMEOUT_HZ); + if (!time_left) { + /* Sometimes the PCI HIF doesn't receive interrupt + * for the service ready message even if the buffer + * was completed. PCIe sniffer shows that it's + * because the corresponding CE ring doesn't fires + * it. Workaround here by polling CE rings once. + */ + ath10k_warn(ar, "failed to receive service ready completion, polling..\n"); + for (i = 0; i < CE_COUNT; i++) ath10k_hif_send_complete_check(ar, i, 1); - /* The 100 ms granularity is a tradeoff considering scheduler - * overhead and response latency - */ time_left = wait_for_completion_timeout(&ar->wmi.service_ready, - msecs_to_jiffies(100)); - if (time_left) - return 0; - } while (time_before(jiffies, timeout)); + WMI_SERVICE_READY_TIMEOUT_HZ); + if (!time_left) { + ath10k_warn(ar, "polling timed out\n"); + return -ETIMEDOUT; + } - ath10k_warn(ar, "failed to receive service ready completion\n"); - return -ETIMEDOUT; + ath10k_warn(ar, "service ready completion received, continuing normally\n"); + } + + return 0; } int ath10k_wmi_wait_for_unified_ready(struct ath10k *ar) From 8a9fb5129e8e64d24543ebc70de941a2d77a9e77 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Thu, 23 Oct 2025 14:46:29 +0200 Subject: [PATCH 0427/1075] x86/microcode/AMD: Limit Entrysign signature checking to known generations Limit Entrysign sha256 signature checking to CPUs in the range Zen1-Zen5. X86_BUG cannot be used here because the loading on the BSP happens way too early, before the cpufeatures machinery has been set up. Signed-off-by: Borislav Petkov (AMD) Link: https://patch.msgid.link/all/20251023124629.5385-1-bp@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 28ed8c089024..b7c797dc94f4 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -233,13 +233,31 @@ static bool need_sha_check(u32 cur_rev) return true; } +static bool cpu_has_entrysign(void) +{ + unsigned int fam = x86_family(bsp_cpuid_1_eax); + unsigned int model = x86_model(bsp_cpuid_1_eax); + + if (fam == 0x17 || fam == 0x19) + return true; + + if (fam == 0x1a) { + if (model <= 0x2f || + (0x40 <= model && model <= 0x4f) || + (0x60 <= model && model <= 0x6f)) + return true; + } + + return false; +} + static bool verify_sha256_digest(u32 patch_id, u32 cur_rev, const u8 *data, unsigned int len) { struct patch_digest *pd = NULL; u8 digest[SHA256_DIGEST_SIZE]; int i; - if (x86_family(bsp_cpuid_1_eax) < 0x17) + if (!cpu_has_entrysign()) return true; if (!need_sha_check(cur_rev)) From 8080c67dd57cb968150b668ecbd4a4e4afd56ad4 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 24 Oct 2025 19:01:00 +0000 Subject: [PATCH 0428/1075] kunit: prevent log overwrite in param_tests When running parameterized tests, each test case is initialized with kunit_init_test(). This function takes the test_case->log as a parameter but it clears it via string_stream_clear() on each iteration. This results in only the log from the last parameter being preserved in the test_case->log and the results from the previous parameters are lost from the debugfs entry. Fix this by manually setting the param_test.log to the test_case->log after it has been initialized. This prevents kunit_init_test() from clearing the log on each iteration. Link: https://lore.kernel.org/r/20251024190101.2091549-1-cmllamas@google.com Fixes: 4b59300ba4d2 ("kunit: Add parent kunit for parameterized test context") Signed-off-by: Carlos Llamas Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/kunit/test.c b/lib/kunit/test.c index bb66ea1a3eac..62eb529824c6 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -745,7 +745,8 @@ int kunit_run_tests(struct kunit_suite *suite) .param_index = ++test.param_index, .parent = &test, }; - kunit_init_test(¶m_test, test_case->name, test_case->log); + kunit_init_test(¶m_test, test_case->name, NULL); + param_test.log = test_case->log; kunit_run_case_catch_errors(suite, test_case, ¶m_test); if (param_desc[0] == '\0') { From 060ea84a484e852b52b938f234bf9b5503a6c910 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Mon, 27 Oct 2025 11:40:43 -0600 Subject: [PATCH 0429/1075] riscv: stacktrace: Disable KASAN checks for non-current tasks Unwinding the stack of a task other than current, KASAN would report "BUG: KASAN: out-of-bounds in walk_stackframe+0x41c/0x460" There is a same issue on x86 and has been resolved by the commit 84936118bdf3 ("x86/unwind: Disable KASAN checks for non-current tasks") The solution could be applied to RISC-V too. This patch also can solve the issue: https://seclists.org/oss-sec/2025/q4/23 Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly") Co-developed-by: Jiakai Xu Signed-off-by: Jiakai Xu Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20251022072608.743484-1-zhangchunyan@iscas.ac.cn [pjw@kernel.org: clean up checkpatch issues] Signed-off-by: Paul Walmsley --- arch/riscv/kernel/stacktrace.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 3fe9e6edef8f..b41b6255751c 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,6 +16,22 @@ #ifdef CONFIG_FRAME_POINTER +/* + * This disables KASAN checking when reading a value from another task's stack, + * since the other task could be running on another CPU and could have poisoned + * the stack in the meantime. + */ +#define READ_ONCE_TASK_STACK(task, x) \ +({ \ + unsigned long val; \ + unsigned long addr = x; \ + if ((task) == current) \ + val = READ_ONCE(addr); \ + else \ + val = READ_ONCE_NOCHECK(addr); \ + val; \ +}) + extern asmlinkage void handle_exception(void); extern unsigned long ret_from_exception_end; @@ -69,8 +85,9 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, fp = frame->ra; pc = regs->ra; } else { - fp = frame->fp; - pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, + fp = READ_ONCE_TASK_STACK(task, frame->fp); + pc = READ_ONCE_TASK_STACK(task, frame->ra); + pc = ftrace_graph_ret_addr(current, &graph_idx, pc, &frame->ra); if (pc >= (unsigned long)handle_exception && pc < (unsigned long)&ret_from_exception_end) { From a74f038fa50e0d33b740f44f862fe856f16de6a8 Mon Sep 17 00:00:00 2001 From: Josephine Pfeiffer Date: Mon, 27 Oct 2025 11:40:43 -0600 Subject: [PATCH 0430/1075] riscv: ptdump: use seq_puts() in pt_dump_seq_puts() macro The pt_dump_seq_puts() macro incorrectly uses seq_printf() instead of seq_puts(). This is both a performance issue and conceptually wrong, as the macro name suggests plain string output (puts) but the implementation uses formatted output (printf). The macro is used in ptdump.c:301 to output a newline character. Using seq_printf() adds unnecessary overhead for format string parsing when outputting this constant string. This bug was introduced in commit 59c4da8640cc ("riscv: Add support to dump the kernel page tables") in 2020, which copied the implementation pattern from other architectures that had the same bug. Fixes: 59c4da8640cc ("riscv: Add support to dump the kernel page tables") Signed-off-by: Josephine Pfeiffer Link: https://lore.kernel.org/r/20251018170451.3355496-1-hi@josie.lol Signed-off-by: Paul Walmsley --- arch/riscv/mm/ptdump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 3b51690cc876..34299c2b231f 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -21,7 +21,7 @@ #define pt_dump_seq_puts(m, fmt) \ ({ \ if (m) \ - seq_printf(m, fmt); \ + seq_puts(m, fmt); \ }) /* From c42458fcf54b3d0bc2ac06667c98dceb43831889 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 27 Oct 2025 11:40:44 -0600 Subject: [PATCH 0431/1075] riscv: Fix memory leak in module_frob_arch_sections() The current code directly overwrites the scratch pointer with the return value of kvrealloc(). If kvrealloc() fails and returns NULL, the original buffer becomes unreachable, causing a memory leak. Fix this by using a temporary variable to store kvrealloc()'s return value and only update the scratch pointer on success. Found via static anlaysis and this is similar to commit 42378a9ca553 ("bpf, verifier: Fix memory leak in array reallocation for stack state") Fixes: be17c0df6795 ("riscv: module: Optimize PLT/GOT entry counting") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Link: https://lore.kernel.org/r/20251026091912.39727-1-linmq006@gmail.com Signed-off-by: Paul Walmsley --- arch/riscv/kernel/module-sections.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c index 75551ac6504c..1675cbad8619 100644 --- a/arch/riscv/kernel/module-sections.c +++ b/arch/riscv/kernel/module-sections.c @@ -119,6 +119,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned int num_plts = 0; unsigned int num_gots = 0; Elf_Rela *scratch = NULL; + Elf_Rela *new_scratch; size_t scratch_size = 0; int i; @@ -168,9 +169,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch); if (scratch_size_needed > scratch_size) { scratch_size = scratch_size_needed; - scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); - if (!scratch) + new_scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); + if (!new_scratch) { + kvfree(scratch); return -ENOMEM; + } + scratch = new_scratch; } for (size_t j = 0; j < num_relas; j++) From dc131bcd8d9219f7da533918abcb0d32951b7702 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 22 Oct 2025 13:45:23 -0700 Subject: [PATCH 0432/1075] ACPI: MRRM: Check revision of MRRM table Before trying to parse the MRRM table, check that the table revision is the one that is expected. Fixes: b9020bdb9f76 ("ACPI: MRRM: Minimal parse of ACPI MRRM table") Signed-off-by: Tony Luck Link: https://patch.msgid.link/20251022204523.10752-1-tony.luck@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_mrrm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/acpi_mrrm.c b/drivers/acpi/acpi_mrrm.c index 47ea3ccc2142..a6dbf623e557 100644 --- a/drivers/acpi/acpi_mrrm.c +++ b/drivers/acpi/acpi_mrrm.c @@ -63,6 +63,9 @@ static __init int acpi_parse_mrrm(struct acpi_table_header *table) if (!mrrm) return -ENODEV; + if (mrrm->header.revision != 1) + return -EINVAL; + if (mrrm->flags & ACPI_MRRM_FLAGS_REGION_ASSIGNMENT_OS) return -EOPNOTSUPP; From 543d35004007a06ef247acf2fc55efa8388aa741 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 24 Oct 2025 13:31:25 +0100 Subject: [PATCH 0433/1075] ACPI: SPCR: Check for table version when using precise baudrate Commit 4d330fe54145 ("ACPI: SPCR: Support Precise Baud Rate field") added support to use the precise baud rate available since SPCR 1.09 (revision 4) but failed to check the version of the table provided by the firmware. Accessing an older version of SPCR table causes accesses beyond the end of the table and can lead to garbage data to be used for the baud rate. Check the version of the firmware provided SPCR to ensure that the precise baudrate is vaild before using it. Fixes: 4d330fe54145 ("ACPI: SPCR: Support Precise Baud Rate field") Signed-off-by: Punit Agrawal Link: https://patch.msgid.link/20251024123125.1081612-1-punit.agrawal@oss.qualcomm.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/spcr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c index d4d52d5e9016..73cb933fdc89 100644 --- a/drivers/acpi/spcr.c +++ b/drivers/acpi/spcr.c @@ -155,7 +155,7 @@ int __init acpi_parse_spcr(bool enable_earlycon, bool enable_console) * Baud Rate field. If this field is zero or not present, Configured * Baud Rate is used. */ - if (table->precise_baudrate) + if (table->header.revision >= 4 && table->precise_baudrate) baud_rate = table->precise_baudrate; else switch (table->baud_rate) { case 0: From a042beac6e6f8ac1e923784cfff98b47cbabb185 Mon Sep 17 00:00:00 2001 From: Tomeu Vizoso Date: Tue, 21 Oct 2025 11:37:23 +0200 Subject: [PATCH 0434/1075] drm/etnaviv: fix flush sequence logic The current logic uses the flush sequence from the current address space. This is harmless when deducing the flush requirements for the current submit, as either the incoming address space is the same one as the currently active one or we switch context, in which case the flush is unconditional. However, this sequence is also stored as the current flush sequence of the GPU. If we switch context the stored flush sequence will no longer belong to the currently active address space. This incoherency can then cause missed flushes, resulting in translation errors. Fixes: 27b67278e007 ("drm/etnaviv: rework MMU handling") Signed-off-by: Tomeu Vizoso Signed-off-by: Lucas Stach Reviewed-by: Christian Gmeiner Link: https://lore.kernel.org/r/20251021093723.3887980-1-l.stach@pengutronix.de --- drivers/gpu/drm/etnaviv/etnaviv_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index b13a17276d07..88385dc3b30d 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -347,7 +347,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state, u32 link_target, link_dwords; bool switch_context = gpu->exec_state != exec_state; bool switch_mmu_context = gpu->mmu_context != mmu_context; - unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq); + unsigned int new_flush_seq = READ_ONCE(mmu_context->flush_seq); bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq; bool has_blt = !!(gpu->identity.minor_features5 & chipMinorFeatures5_BLT_ENGINE); From d50f21091358b2b29dc06c2061106cdb0f030d03 Mon Sep 17 00:00:00 2001 From: Dimitri John Ledkov Date: Sun, 26 Oct 2025 20:21:00 +0000 Subject: [PATCH 0435/1075] kbuild: align modinfo section for Secureboot Authenticode EDK2 compat Previously linker scripts would always generate vmlinuz that has sections aligned. And thus padded (correct Authenticode calculation) and unpadded calculation would be same. As in https://github.com/rhboot/pesign userspace tool would produce the same authenticode digest for both of the following commands: pesign --padding --hash --in ./arch/x86_64/boot/bzImage pesign --nopadding --hash --in ./arch/x86_64/boot/bzImage The commit 3e86e4d74c04 ("kbuild: keep .modinfo section in vmlinux.unstripped") added .modinfo section of variable length. Depending on kernel configuration it may or may not be aligned. All userspace signing tooling correctly pads such section to calculation spec compliant authenticode digest. However, if bzImage is not further processed and is attempted to be loaded directly by EDK2 firmware, it calculates unpadded Authenticode digest and fails to correct accept/reject such kernel builds even when propoer Authenticode values are enrolled in db/dbx. One can say EDK2 requires aligned/padded kernels in Secureboot. Thus add ALIGN(8) to the .modinfo section, to esure kernels irrespective of modinfo contents can be loaded by all existing EDK2 firmware builds. Fixes: 3e86e4d74c04 ("kbuild: keep .modinfo section in vmlinux.unstripped") Cc: stable@vger.kernel.org Signed-off-by: Dimitri John Ledkov Link: https://patch.msgid.link/20251026202100.679989-1-dimitri.ledkov@surgut.co.uk Signed-off-by: Nathan Chancellor --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a9a2e732a65..e04d56a5332e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -832,7 +832,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) /* Required sections not related to debugging. */ #define ELF_DETAILS \ - .modinfo : { *(.modinfo) } \ + .modinfo : { *(.modinfo) . = ALIGN(8); } \ .comment 0 : { *(.comment) } \ .symtab 0 : { *(.symtab) } \ .strtab 0 : { *(.strtab) } \ From cf20852500d2895d1db22b69d87281aca2e7b5d0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 23 Oct 2025 21:01:29 +0200 Subject: [PATCH 0436/1075] KMSAN: Restore dynamic check for '-fsanitize=kernel-memory' Commit 5ff8c11775c7 ("KMSAN: Remove tautological checks") changed CONFIG_HAVE_KMSAN_COMPILER from a dynamic check for '-fsanitize=kernel-memory' to just being true for CONFIG_CC_IS_CLANG. This missed the fact that not all architectures supported '-fsanitize=kernel-memory' at the same time. For example, SystemZ / s390 gained support for KMSAN in clang-18 [1], so builds with clang-15 through clang-17 can select KMSAN but they error with: clang-16: error: unsupported option '-fsanitize=kernel-memory' for target 's390x-unknown-linux-gnu' Restore the cc-option check for '-fsanitize=kernel-memory' to make sure the compiler target properly supports '-fsanitize=kernel-memory'. The check for '-msan-disable-checks=1' does not need to be restored because all supported clang versions for building the kernel support it. Fixes: 5ff8c11775c7 ("KMSAN: Remove tautological checks") Link: https://github.com/llvm/llvm-project/commit/a3e56a8792ffaf3a3d3538736e1042b8db45ab89 [1] Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202510220236.AVuXXCYy-lkp@intel.com/ Acked-by: Nicolas Schier Link: https://patch.msgid.link/20251023-fix-kmsan-check-s390-clang-v1-1-4e6df477a4cc@kernel.org Signed-off-by: Nathan Chancellor --- lib/Kconfig.kmsan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.kmsan b/lib/Kconfig.kmsan index 7251b6b59e69..cae1ddcc18e1 100644 --- a/lib/Kconfig.kmsan +++ b/lib/Kconfig.kmsan @@ -3,7 +3,7 @@ config HAVE_ARCH_KMSAN bool config HAVE_KMSAN_COMPILER - def_bool CC_IS_CLANG + def_bool $(cc-option,-fsanitize=kernel-memory) config KMSAN bool "KMSAN: detector of uninitialized values use" From 65f9c4c5888913c2cf5d2fc9454c83f9930d537d Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 24 Oct 2025 15:24:38 +0200 Subject: [PATCH 0437/1075] tools: ynl: fix string attribute length to include null terminator The ynl_attr_put_str() function was not including the null terminator in the attribute length calculation. This caused kernel to reject CTRL_CMD_GETFAMILY requests with EINVAL: "Attribute failed policy validation". For a 4-character family name like "dpll": - Sent: nla_len=8 (4 byte header + 4 byte string without null) - Expected: nla_len=9 (4 byte header + 5 byte string with null) The bug was introduced in commit 15d2540e0d62 ("tools: ynl: check for overflow of constructed messages") when refactoring from stpcpy() to strlen(). The original code correctly included the null terminator: end = stpcpy(ynl_attr_data(attr), str); attr->nla_len = NLA_HDRLEN + NLA_ALIGN(end - (char *)ynl_attr_data(attr)); Since stpcpy() returns a pointer past the null terminator, the length included it. The refactored version using strlen() omitted the +1. The fix also removes NLA_ALIGN() from nla_len calculation, since nla_len should contain actual attribute length, not aligned length. Alignment is only for calculating next attribute position. This makes the code consistent with ynl_attr_put(). CTRL_ATTR_FAMILY_NAME uses NLA_NUL_STRING policy which requires null terminator. Kernel validates with memchr() and rejects if not found. Fixes: 15d2540e0d62 ("tools: ynl: check for overflow of constructed messages") Signed-off-by: Petr Oros Tested-by: Ivan Vecera Reviewed-by: Ivan Vecera Link: https://lore.kernel.org/20251018151737.365485-3-zahari.doychev@linux.com Link: https://patch.msgid.link/20251024132438.351290-1-poros@redhat.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 29481989ea76..ced7dce44efb 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -313,7 +313,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) struct nlattr *attr; size_t len; - len = strlen(str); + len = strlen(str) + 1; if (__ynl_attr_put_overflow(nlh, len)) return; @@ -321,7 +321,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) attr->nla_type = attr_type; strcpy((char *)ynl_attr_data(attr), str); - attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len); + attr->nla_len = NLA_HDRLEN + len; nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); } From 217660325580d51c350726159703b2fcec2b95e3 Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 20 Oct 2025 20:23:55 +0800 Subject: [PATCH 0438/1075] riscv: tests: Rename kprobes_test_riscv to kprobes_riscv According to Documentation/dev-tools/kunit/style.rst a KUnit test suite normally should not have "test" in the name. Rename it to follow the style guide. Signed-off-by: Vivian Wang Tested-by: Inochi Amaoto Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20251020-riscv-kunit-kconfig-fix-6-18-v1-1-d773b5d5ce48@iscas.ac.cn Signed-off-by: Paul Walmsley --- arch/riscv/kernel/tests/kprobes/test-kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/tests/kprobes/test-kprobes.c b/arch/riscv/kernel/tests/kprobes/test-kprobes.c index 6f6cdfbf5a95..0716c88bcec7 100644 --- a/arch/riscv/kernel/tests/kprobes/test-kprobes.c +++ b/arch/riscv/kernel/tests/kprobes/test-kprobes.c @@ -49,7 +49,7 @@ static struct kunit_case kprobes_testcases[] = { }; static struct kunit_suite kprobes_test_suite = { - .name = "kprobes_test_riscv", + .name = "kprobes_riscv", .test_cases = kprobes_testcases, }; From 5fada1605733a427feff45147cef9455ce29620f Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 20 Oct 2025 20:23:56 +0800 Subject: [PATCH 0439/1075] riscv: tests: Make RISCV_KPROBES_KUNIT tristate This disallows KUNIT=m and RISCV_KPROBES_KUNIT=y, which produces these relocs_check.sh warnings when RELOCATABLE=y: WARNING: 3 bad relocations ffffffff81e24118 R_RISCV_64 kunit_unary_assert_format ffffffff81e24a60 R_RISCV_64 kunit_binary_assert_format ffffffff81e269d0 R_RISCV_JUMP_SLOT __kunit_do_failed_assertion This fixes allmodconfig build. Reported-by: Inochi Amaoto Fixes: f2fab612824f ("riscv: Add kprobes KUnit test") Signed-off-by: Vivian Wang Tested-by: Inochi Amaoto Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20251020-riscv-kunit-kconfig-fix-6-18-v1-2-d773b5d5ce48@iscas.ac.cn Signed-off-by: Paul Walmsley --- arch/riscv/kernel/tests/Kconfig.debug | 2 +- arch/riscv/kernel/tests/kprobes/Makefile | 4 +++- arch/riscv/kernel/tests/kprobes/test-kprobes.c | 3 +++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/tests/Kconfig.debug b/arch/riscv/kernel/tests/Kconfig.debug index 5db4df44279e..40f8dafffa0a 100644 --- a/arch/riscv/kernel/tests/Kconfig.debug +++ b/arch/riscv/kernel/tests/Kconfig.debug @@ -31,7 +31,7 @@ config RISCV_MODULE_LINKING_KUNIT If unsure, say N. config RISCV_KPROBES_KUNIT - bool "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS + tristate "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS depends on KUNIT depends on KPROBES default KUNIT_ALL_TESTS diff --git a/arch/riscv/kernel/tests/kprobes/Makefile b/arch/riscv/kernel/tests/kprobes/Makefile index 4cb6c66a98e8..df7256f62313 100644 --- a/arch/riscv/kernel/tests/kprobes/Makefile +++ b/arch/riscv/kernel/tests/kprobes/Makefile @@ -1 +1,3 @@ -obj-y += test-kprobes.o test-kprobes-asm.o +obj-$(CONFIG_RISCV_KPROBES_KUNIT) += kprobes_riscv_kunit.o + +kprobes_riscv_kunit-objs := test-kprobes.o test-kprobes-asm.o diff --git a/arch/riscv/kernel/tests/kprobes/test-kprobes.c b/arch/riscv/kernel/tests/kprobes/test-kprobes.c index 0716c88bcec7..664535ca0a98 100644 --- a/arch/riscv/kernel/tests/kprobes/test-kprobes.c +++ b/arch/riscv/kernel/tests/kprobes/test-kprobes.c @@ -54,3 +54,6 @@ static struct kunit_suite kprobes_test_suite = { }; kunit_test_suites(&kprobes_test_suite); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("KUnit test for riscv kprobes"); From e3a0ca09acbe697245f944ee92b956db58a0ed09 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 23 Oct 2025 16:24:06 +0700 Subject: [PATCH 0440/1075] MAINTAINERS: mark ISDN subsystem as orphan We have not heard any activities from Karsten in years: - Last review tag was nine years ago in commit a921e9bd4e22a7 ("isdn: i4l: move active-isdn drivers to staging") - Last message on lore was in October 2020 [1]. Furthermore, messages to isdn mailing list bounce. Mark the subsystem as orphan to reflect these. [1]: https://lore.kernel.org/all/0ee243a9-9937-ad26-0684-44b18e772662@linux-pingi.de/ Signed-off-by: Bagas Sanjaya Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251023092406.56699-1-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 8 ++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CREDITS b/CREDITS index 903ea238e64f..fa5397f4ebcd 100644 --- a/CREDITS +++ b/CREDITS @@ -2036,6 +2036,10 @@ S: Botanicka' 68a S: 602 00 Brno S: Czech Republic +N: Karsten Keil +E: isdn@linux-pingi.de +D: ISDN subsystem maintainer + N: Jakob Kemi E: jakob.kemi@telia.com D: V4L W9966 Webcam driver diff --git a/MAINTAINERS b/MAINTAINERS index bccada21ef41..cca911b2de4e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13247,10 +13247,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git mast F: drivers/infiniband/ulp/isert ISDN/CMTP OVER BLUETOOTH -M: Karsten Keil -L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org -S: Odd Fixes +S: Orphan W: http://www.isdn4linux.de F: Documentation/isdn/ F: drivers/isdn/capi/ @@ -13259,10 +13257,8 @@ F: include/uapi/linux/isdn/ F: net/bluetooth/cmtp/ ISDN/mISDN SUBSYSTEM -M: Karsten Keil -L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org -S: Maintained +S: Orphan W: http://www.isdn4linux.de F: drivers/isdn/Kconfig F: drivers/isdn/Makefile From 03ca7c8c42be913529eb9f188278114430c6abbd Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 23 Oct 2025 21:13:37 +0800 Subject: [PATCH 0441/1075] net: hns3: return error code when function fails Currently, in hclge_mii_ioctl(), the operation to read the PHY register (SIOCGMIIREG) always returns 0. This patch changes the return type of hclge_read_phy_reg(), returning an error code when the function fails. Fixes: 024712f51e57 ("net: hns3: add ioctl support for imp-controlled PHYs") Signed-off-by: Jijie Shao Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20251023131338.2642520-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 9 ++++++--- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 9d34d28ff168..782bb48c9f3d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9429,8 +9429,7 @@ static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd) /* this command reads phy id and register at the same time */ fallthrough; case SIOCGMIIREG: - data->val_out = hclge_read_phy_reg(hdev, data->reg_num); - return 0; + return hclge_read_phy_reg(hdev, data->reg_num, &data->val_out); case SIOCSMIIREG: return hclge_write_phy_reg(hdev, data->reg_num, data->val_in); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 96553109f44c..cf881108fa57 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -274,7 +274,7 @@ void hclge_mac_stop_phy(struct hclge_dev *hdev) phy_stop(phydev); } -u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr) +int hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 *val) { struct hclge_phy_reg_cmd *req; struct hclge_desc desc; @@ -286,11 +286,14 @@ u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr) req->reg_addr = cpu_to_le16(reg_addr); ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) + if (ret) { dev_err(&hdev->pdev->dev, "failed to read phy reg, ret = %d.\n", ret); + return ret; + } - return le16_to_cpu(req->reg_val); + *val = le16_to_cpu(req->reg_val); + return 0; } int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h index 4200d0b6d931..21d434c82475 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h @@ -13,7 +13,7 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle); void hclge_mac_disconnect_phy(struct hnae3_handle *handle); void hclge_mac_start_phy(struct hclge_dev *hdev); void hclge_mac_stop_phy(struct hclge_dev *hdev); -u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr); +int hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 *val); int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val); #endif From 46a499aaf8c27476fd05e800f3e947bfd71aa724 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Thu, 23 Oct 2025 19:48:42 +0530 Subject: [PATCH 0442/1075] sfc: fix potential memory leak in efx_mae_process_mport() In efx_mae_enumerate_mports(), memory allocated for mae_mport_desc is passed as a argument to efx_mae_process_mport(), but when the error path in efx_mae_process_mport() gets executed, the memory allocated for desc gets leaked. Fix that by freeing the memory allocation before returning error. Fixes: a6a15aca4207 ("sfc: enumerate mports in ef100") Acked-by: Edward Cree Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251023141844.25847-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/mae.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 6fd0c1e9a7d5..7cfd9000f79d 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -1090,6 +1090,9 @@ void efx_mae_remove_mport(void *desc, void *arg) kfree(mport); } +/* + * Takes ownership of @desc, even if it returns an error + */ static int efx_mae_process_mport(struct efx_nic *efx, struct mae_mport_desc *desc) { @@ -1100,6 +1103,7 @@ static int efx_mae_process_mport(struct efx_nic *efx, if (!IS_ERR_OR_NULL(mport)) { netif_err(efx, drv, efx->net_dev, "mport with id %u does exist!!!\n", desc->mport_id); + kfree(desc); return -EEXIST; } From 44aa25c000b41d7afcb030ac1b8a38f06dabef0a Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 24 Oct 2025 18:16:40 +0100 Subject: [PATCH 0443/1075] riscv: asm: use .insn for making custom instructions The assembler has .insn for building custom instructions now, so change the .4byte to .insn. This ensures the output is marked as an instruction and not as data which may confuse both debuggers and anything else that relies on this sort of marking. Add an ASM_INSN_I() wrapper in asm.h to allow the selecting of how this is output so older assemblers are still good. Reviewed-by: Andrew Jones Signed-off-by: Ben Dooks Link: https://lore.kernel.org/r/20251024171640.65232-1-ben.dooks@codethink.co.uk Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/asm.h | 6 ++++++ arch/riscv/include/asm/insn-def.h | 8 ++++---- arch/riscv/include/asm/vendor_extensions/mips.h | 6 +++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index ac28066bb564..e9e8ba83e632 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -12,6 +12,12 @@ #define __ASM_STR(x) #x #endif +#ifdef CONFIG_AS_HAS_INSN +#define ASM_INSN_I(__x) ".insn " __x +#else +#define ASM_INSN_I(__x) ".4byte " __x +#endif + #if __riscv_xlen == 64 #define __REG_SEL(a, b) __ASM_STR(a) #elif __riscv_xlen == 32 diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h index c9cfcea52cbb..d29da6ccd3dd 100644 --- a/arch/riscv/include/asm/insn-def.h +++ b/arch/riscv/include/asm/insn-def.h @@ -256,10 +256,10 @@ INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \ SIMM12((offset) & 0xfe0), RS1(base)) -#define RISCV_PAUSE ".4byte 0x100000f" -#define ZAWRS_WRS_NTO ".4byte 0x00d00073" -#define ZAWRS_WRS_STO ".4byte 0x01d00073" -#define RISCV_NOP4 ".4byte 0x00000013" +#define RISCV_PAUSE ASM_INSN_I("0x100000f") +#define ZAWRS_WRS_NTO ASM_INSN_I("0x00d00073") +#define ZAWRS_WRS_STO ASM_INSN_I("0x01d00073") +#define RISCV_NOP4 ASM_INSN_I("0x00000013") #define RISCV_INSN_NOP4 _AC(0x00000013, U) diff --git a/arch/riscv/include/asm/vendor_extensions/mips.h b/arch/riscv/include/asm/vendor_extensions/mips.h index ea8ca747d691..ffeb12dc17a3 100644 --- a/arch/riscv/include/asm/vendor_extensions/mips.h +++ b/arch/riscv/include/asm/vendor_extensions/mips.h @@ -30,8 +30,8 @@ extern struct riscv_isa_vendor_ext_data_list riscv_isa_vendor_ext_list_mips; * allowing any subsequent instructions to fetch. */ -#define MIPS_PAUSE ".4byte 0x00501013\n\t" -#define MIPS_EHB ".4byte 0x00301013\n\t" -#define MIPS_IHB ".4byte 0x00101013\n\t" +#define MIPS_PAUSE ASM_INSN_I("0x00501013\n\t") +#define MIPS_EHB ASM_INSN_I("0x00301013\n\t") +#define MIPS_IHB ASM_INSN_I("0x00101013\n\t") #endif // _ASM_RISCV_VENDOR_EXTENSIONS_MIPS_H From e3966940559d52aa1800a008dcfeec218dd31f88 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 24 Oct 2025 12:58:53 +0000 Subject: [PATCH 0444/1075] tools: ynl: avoid print_field when there is no reply When request a none support device operation, there will be no reply. In this case, the len(desc) check will always be true, causing print_field to enter an infinite loop and crash the program. Example reproducer: # ethtool.py -c veth0 To fix this, return immediately if there is no reply. Fixes: f3d07b02b2b8 ("tools: ynl: ethtool testing tool") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251024125853.102916-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ethtool.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index 9b523cbb3568..fd0f6b8d54d1 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -44,6 +44,9 @@ def print_field(reply, *desc): Pretty-print a set of fields from the reply. desc specifies the fields and the optional type (bool/yn). """ + if not reply: + return + if len(desc) == 0: return print_field(reply, *zip(reply.keys(), reply.keys())) From 520ad9e96937e825a117e9f00dd35a3e199d67b5 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 24 Oct 2025 20:55:12 +0200 Subject: [PATCH 0445/1075] dpll: spec: add missing module-name and clock-id to pin-get reply The dpll.yaml spec incorrectly omitted module-name and clock-id from the pin-get operation reply specification, even though the kernel DPLL implementation has always included these attributes in pin-get responses since the initial implementation. This spec inconsistency caused issues with the C YNL code generator. The generated dpll_pin_get_rsp structure was missing these fields. Fix the spec by adding module-name and clock-id to the pin-attrs reply specification to match the actual kernel behavior. Fixes: 3badff3a25d8 ("dpll: spec: Add Netlink spec in YAML") Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Link: https://patch.msgid.link/20251024185512.363376-1-poros@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/dpll.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index cafb4ec20447..80728f6f9bc8 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -605,6 +605,8 @@ operations: reply: &pin-attrs attributes: - id + - module-name + - clock-id - board-label - panel-label - package-label From 210b35d6a7ea415494ce75490c4b43b4e717d935 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Oct 2025 11:17:42 +0100 Subject: [PATCH 0446/1075] dt-bindings: net: sparx5: Narrow properly LAN969x register space windows Commit 267bca002c50 ("dt-bindings: net: sparx5: correct LAN969x register space windows") said that LAN969x has exactly two address spaces ("reg" property) but implemented it as 2 or more. Narrow the constraint to properly express that only two items are allowed, which also matches Linux driver. Fixes: 267bca002c50 ("dt-bindings: net: sparx5: correct LAN969x register space windows") Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251026101741.20507-2-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/microchip,sparx5-switch.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml index 5caa3779660d..5491d0775ede 100644 --- a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml +++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml @@ -180,9 +180,9 @@ allOf: then: properties: reg: - minItems: 2 + maxItems: 2 reg-names: - minItems: 2 + maxItems: 2 else: properties: reg: From 5228ed2c624449d1a53d1c84ba01c021d2df95c0 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sat, 11 Oct 2025 02:47:47 +0200 Subject: [PATCH 0447/1075] riscv: KGDB: Replace deprecated strcpy in kgdb_arch_handle_qxfer_pkt strcpy() is deprecated because it can cause a buffer overflow when the sizes of the source and the destination are not known at compile time. Use strscpy() instead. Link: https://github.com/KSPP/linux/issues/88 Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20251011004750.461954-1-thorsten.blum@linux.dev Signed-off-by: Paul Walmsley --- arch/riscv/kernel/kgdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c index 9f3db3503dab..15fec5d1e6de 100644 --- a/arch/riscv/kernel/kgdb.c +++ b/arch/riscv/kernel/kgdb.c @@ -265,10 +265,10 @@ void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, { if (!strncmp(remcom_in_buffer, gdb_xfer_read_target, sizeof(gdb_xfer_read_target))) - strcpy(remcom_out_buffer, riscv_gdb_stub_target_desc); + strscpy(remcom_out_buffer, riscv_gdb_stub_target_desc, BUFMAX); else if (!strncmp(remcom_in_buffer, gdb_xfer_read_cpuxml, sizeof(gdb_xfer_read_cpuxml))) - strcpy(remcom_out_buffer, riscv_gdb_stub_cpuxml); + strscpy(remcom_out_buffer, riscv_gdb_stub_cpuxml, BUFMAX); } static inline void kgdb_arch_update_addr(struct pt_regs *regs, From 2e448567839c65768486d56612c88cb327d26050 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 21 Oct 2025 15:51:54 +0200 Subject: [PATCH 0448/1075] cpuidle: riscv-sbi: Replace deprecated strcpy in sbi_cpuidle_init_cpu strcpy() is deprecated; use strscpy() instead. Link: https://github.com/KSPP/linux/issues/88 Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20251021135155.1409-2-thorsten.blum@linux.dev Signed-off-by: Paul Walmsley --- drivers/cpuidle/cpuidle-riscv-sbi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle-riscv-sbi.c b/drivers/cpuidle/cpuidle-riscv-sbi.c index a360bc4d20b7..19be6475d356 100644 --- a/drivers/cpuidle/cpuidle-riscv-sbi.c +++ b/drivers/cpuidle/cpuidle-riscv-sbi.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -303,8 +304,8 @@ static int sbi_cpuidle_init_cpu(struct device *dev, int cpu) drv->states[0].exit_latency = 1; drv->states[0].target_residency = 1; drv->states[0].power_usage = UINT_MAX; - strcpy(drv->states[0].name, "WFI"); - strcpy(drv->states[0].desc, "RISC-V WFI"); + strscpy(drv->states[0].name, "WFI"); + strscpy(drv->states[0].desc, "RISC-V WFI"); /* * If no DT idle states are detected (ret == 0) let the driver From c8b8804760eb0c4c0c7c2b500380ab3fa9f92b5a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 28 Oct 2025 10:20:30 +0000 Subject: [PATCH 0449/1075] ASoC: Fix build for sdw_utils Revert 3293d3d7b0 ("ASoC: sdw_utils: add name_prefix for rt1321 part id") due to dependencies on -next which for some reason don't show up in my builds. Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index d717d4143932..270c66b90228 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -313,7 +313,6 @@ struct asoc_sdw_codec_info codec_info_list[] = { }, { .part_id = 0x1321, - .name_prefix = "rt1320", .dais = { { .direction = {true, false}, From b2dd1d0d322dce5f331961c927e775b84014d5ab Mon Sep 17 00:00:00 2001 From: Maarten Zanders Date: Fri, 24 Oct 2025 15:57:15 +0200 Subject: [PATCH 0450/1075] ASoC: fsl_sai: Fix sync error in consumer mode When configured for default synchronisation (Rx syncs to Tx) and the SAI operates in consumer mode (clocks provided externally to Tx), a synchronisation error occurs on Tx on the first attempt after device initialisation when the playback stream is started while a capture stream is already active. This results in channel shift/swap on the playback stream. Subsequent streams (ie after that first failing one) always work correctly, no matter the order, with or without the other stream active. This issue was observed (and fix tested) on an i.MX6UL board connected to an ADAU1761 codec, where the codec provides both frame and bit clock (connected to TX pins). To fix this, always initialize the 'other' xCR4 and xCR5 registers when we're starting a stream which is synced to the opposite one, irregardless of the producer/consumer status. Fixes: 51659ca069ce ("ASoC: fsl-sai: set xCR4/xCR5/xMR for SAI master mode") Signed-off-by: Maarten Zanders Reviewed-by: Shengjiu Wang Link: https://patch.msgid.link/20251024135716.584265-1-maarten@zanders.be Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_sai.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index 65093325a6b6..72bfc91e21b9 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -652,12 +652,12 @@ static int fsl_sai_hw_params(struct snd_pcm_substream *substream, val_cr4 |= FSL_SAI_CR4_CHMOD; /* - * For SAI provider mode, when Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will - * generate bclk and frame clock for Tx(Rx), we should set RCR4(TCR4), - * RCR5(TCR5) for playback(capture), or there will be sync error. + * When Tx(Rx) sync with Rx(Tx) clock, Rx(Tx) will provide bclk and + * frame clock for Tx(Rx). We should set RCR4(TCR4), RCR5(TCR5) + * for playback(capture), or there will be sync error. */ - if (!sai->is_consumer_mode[tx] && fsl_sai_dir_is_synced(sai, adir)) { + if (fsl_sai_dir_is_synced(sai, adir)) { regmap_update_bits(sai->regmap, FSL_SAI_xCR4(!tx, ofs), FSL_SAI_CR4_SYWD_MASK | FSL_SAI_CR4_FRSZ_MASK | FSL_SAI_CR4_CHMOD_MASK, From 45f5c9eec43a9bf448f46562f146810831916cc9 Mon Sep 17 00:00:00 2001 From: Bard Liao Date: Mon, 27 Oct 2025 22:00:12 +0800 Subject: [PATCH 0451/1075] ASoC: soc_sdw_utils: remove cs42l43 component_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "spk:cs42l43-spk" component string will be added conditionally by asoc_sdw_cs42l43_spk_rtd_init(). We should not add "spk:cs42l43" unconditionally. Fixes: c61da55412a0 ("ASoC: sdw_utils: Add missed component_name strings for speaker amps") Signed-off-by: Bard Liao Reviewed-by: Péter Ujfalusi Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20251027140012.966306-1-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 270c66b90228..f7c8c16308de 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -638,7 +638,6 @@ struct asoc_sdw_codec_info codec_info_list[] = { { .direction = {true, false}, .dai_name = "cs42l43-dp6", - .component_name = "cs42l43", .dai_type = SOC_SDW_DAI_TYPE_AMP, .dailink = {SOC_SDW_AMP_OUT_DAI_ID, SOC_SDW_UNUSED_DAI_ID}, .init = asoc_sdw_cs42l43_spk_init, From 607b9fb2ce248cc5b633c5949e0153838992c152 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Mon, 20 Oct 2025 11:13:55 +0200 Subject: [PATCH 0452/1075] x86/CPU/AMD: Add RDSEED fix for Zen5 There's an issue with RDSEED's 16-bit and 32-bit register output variants on Zen5 which return a random value of 0 "at a rate inconsistent with randomness while incorrectly signaling success (CF=1)". Search the web for AMD-SB-7055 for more detail. Add a fix glue which checks microcode revisions. [ bp: Add microcode revisions checking, rewrite. ] Cc: stable@vger.kernel.org Signed-off-by: Gregory Price Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20251018024010.4112396-1-gourry@gourry.net --- arch/x86/kernel/cpu/amd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ccaa51ce63f6..bc29be670a2a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1035,8 +1035,18 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) } } +static const struct x86_cpu_id zen5_rdseed_microcode[] = { + ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), + ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), +}; + static void init_amd_zen5(struct cpuinfo_x86 *c) { + if (!x86_match_min_microcode_rev(zen5_rdseed_microcode)) { + clear_cpu_cap(c, X86_FEATURE_RDSEED); + msr_clear_bit(MSR_AMD64_CPUID_FN_7, 18); + pr_emerg_once("RDSEED32 is broken. Disabling the corresponding CPUID bit.\n"); + } } static void init_amd(struct cpuinfo_x86 *c) From 9e8b3201c7302d5b522ba3535630bed21cc03c27 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Wed, 15 Oct 2025 16:01:28 +0200 Subject: [PATCH 0453/1075] drm/sched: avoid killing parent entity on child SIGKILL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DRM scheduler tracks who last uses an entity and when that process is killed blocks all further submissions to that entity. The problem is that we didn't track who initially created an entity, so when a process accidently leaked its file descriptor to a child and that child got killed, we killed the parent's entities. Avoid that and instead initialize the entities last user on entity creation. This also allows to drop the extra NULL check. Signed-off-by: David Rosca Signed-off-by: Christian König Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4568 Reviewed-by: Alex Deucher CC: stable@vger.kernel.org Acked-by: Philipp Stanner Link: https://lore.kernel.org/r/20251015140128.1470-1-christian.koenig@amd.com Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251015140128.1470-1-christian.koenig@amd.com --- drivers/gpu/drm/scheduler/sched_entity.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index aa222166de58..c8e949f4a568 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -70,6 +70,7 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, entity->guilty = guilty; entity->num_sched_list = num_sched_list; entity->priority = priority; + entity->last_user = current->group_leader; /* * It's perfectly valid to initialize an entity without having a valid * scheduler attached. It's just not valid to use the scheduler before it @@ -302,7 +303,7 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) /* For a killed process disallow further enqueueing of jobs. */ last_user = cmpxchg(&entity->last_user, current->group_leader, NULL); - if ((!last_user || last_user == current->group_leader) && + if (last_user == current->group_leader && (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) drm_sched_entity_kill(entity); From 5c76f9961c170552c1d07c830b5e145475151600 Mon Sep 17 00:00:00 2001 From: Henrique Carvalho Date: Mon, 27 Oct 2025 18:29:19 -0300 Subject: [PATCH 0454/1075] smb: client: fix potential cfid UAF in smb2_query_info_compound When smb2_query_info_compound() retries, a previously allocated cfid may have been freed in the first attempt. Because cfid wasn't reset on replay, later cleanup could act on a stale pointer, leading to a potential use-after-free. Reinitialize cfid to NULL under the replay label. Example trace (trimmed): refcount_t: underflow; use-after-free. WARNING: CPU: 1 PID: 11224 at ../lib/refcount.c:28 refcount_warn_saturate+0x9c/0x110 [...] RIP: 0010:refcount_warn_saturate+0x9c/0x110 [...] Call Trace: smb2_query_info_compound+0x29c/0x5c0 [cifs f90b72658819bd21c94769b6a652029a07a7172f] ? step_into+0x10d/0x690 ? __legitimize_path+0x28/0x60 smb2_queryfs+0x6a/0xf0 [cifs f90b72658819bd21c94769b6a652029a07a7172f] smb311_queryfs+0x12d/0x140 [cifs f90b72658819bd21c94769b6a652029a07a7172f] ? kmem_cache_alloc+0x18a/0x340 ? getname_flags+0x46/0x1e0 cifs_statfs+0x9f/0x2b0 [cifs f90b72658819bd21c94769b6a652029a07a7172f] statfs_by_dentry+0x67/0x90 vfs_statfs+0x16/0xd0 user_statfs+0x54/0xa0 __do_sys_statfs+0x20/0x50 do_syscall_64+0x58/0x80 Cc: stable@kernel.org Fixes: 4f1fffa237692 ("cifs: commands that are retried should have replay flag set") Reviewed-by: Paulo Alcantara (Red Hat) Acked-by: Shyam Prasad N Reviewed-by: Enzo Matsumiya Signed-off-by: Henrique Carvalho Signed-off-by: Steve French --- fs/smb/client/smb2ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 0f9130ef2e7d..1e39f2165e42 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2799,11 +2799,12 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid fid; int rc; __le16 *utf16_path; - struct cached_fid *cfid = NULL; + struct cached_fid *cfid; int retries = 0, cur_sleep = 1; replay_again: /* reinitialize for possible replay */ + cfid = NULL; flags = CIFS_CP_CREATE_CLOSE_OP; oplock = SMB2_OPLOCK_LEVEL_NONE; server = cifs_pick_channel(ses); From 12a1c9353c47c0fb3464eba2d78cdf649dee1cf7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 27 Oct 2025 09:27:32 +0900 Subject: [PATCH 0455/1075] block: fix op_is_zone_mgmt() to handle REQ_OP_ZONE_RESET_ALL REQ_OP_ZONE_RESET_ALL is a zone management request. Fix op_is_zone_mgmt() to return true for that operation, like it already does for REQ_OP_ZONE_RESET. While no problems were reported without this fix, this change allows strengthening checks in various block device drivers (scsi sd, virtioblk, DM) where op_is_zone_mgmt() is used to verify that a zone management command is not being issued to a regular block device. Fixes: 6c1b1da58f8c ("block: add zone open, close and finish operations") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 8e8d1cc8b06c..d8ba743a89b7 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -478,6 +478,7 @@ static inline bool op_is_zone_mgmt(enum req_op op) { switch (op & REQ_OP_MASK) { case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_OPEN: case REQ_OP_ZONE_CLOSE: case REQ_OP_ZONE_FINISH: From 19de03b312d69a7e9bacb51c806c6e3f4207376c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 27 Oct 2025 09:27:33 +0900 Subject: [PATCH 0456/1075] block: make REQ_OP_ZONE_OPEN a write operation A REQ_OP_OPEN_ZONE request changes the condition of a sequential zone of a zoned block device to the explicitly open condition (BLK_ZONE_COND_EXP_OPEN). As such, it should be considered a write operation. Change this operation code to be an odd number to reflect this. The following operation numbers are changed to keep the numbering compact. No problems were reported without this change as this operation has no data. However, this unifies the zone operation to reflect that they modify the device state and also allows strengthening checks in the block layer, e.g. checking if this operation is not issued against a read-only device. Fixes: 6c1b1da58f8c ("block: add zone open, close and finish operations") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d8ba743a89b7..44c30183ecc3 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -341,15 +341,15 @@ enum req_op { /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = (__force blk_opf_t)9, /* Open a zone */ - REQ_OP_ZONE_OPEN = (__force blk_opf_t)10, + REQ_OP_ZONE_OPEN = (__force blk_opf_t)11, /* Close a zone */ - REQ_OP_ZONE_CLOSE = (__force blk_opf_t)11, + REQ_OP_ZONE_CLOSE = (__force blk_opf_t)13, /* Transition a zone to full */ - REQ_OP_ZONE_FINISH = (__force blk_opf_t)13, + REQ_OP_ZONE_FINISH = (__force blk_opf_t)15, /* reset a zone write pointer */ - REQ_OP_ZONE_RESET = (__force blk_opf_t)15, + REQ_OP_ZONE_RESET = (__force blk_opf_t)17, /* reset all the zone present on the device */ - REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)17, + REQ_OP_ZONE_RESET_ALL = (__force blk_opf_t)19, /* Driver private requests */ REQ_OP_DRV_IN = (__force blk_opf_t)34, From 7ceba45a6658ce637da334cd0ebf27f4ede6c0fe Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:37 +0200 Subject: [PATCH 0457/1075] wifi: cfg80211: add an hrtimer based delayed work item The normal timer mechanism assume that timeout further in the future need a lower accuracy. As an example, the granularity for a timer scheduled 4096 ms in the future on a 1000 Hz system is already 512 ms. This granularity is perfectly sufficient for e.g. timeouts, but there are other types of events that will happen at a future point in time and require a higher accuracy. Add a new wiphy_hrtimer_work type that uses an hrtimer internally. The API is almost identical to the existing wiphy_delayed_work and it can be used as a drop-in replacement after minor adjustments. The work will be scheduled relative to the current time with a slack of 1 millisecond. CC: stable@vger.kernel.org # 6.4+ Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.7f13a2adc5eb.I01b5af0363869864b0580d9c2a1770bafab69566@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 78 ++++++++++++++++++++++++++++++++++++++++++ net/wireless/core.c | 56 ++++++++++++++++++++++++++++++ net/wireless/trace.h | 21 ++++++++++++ 3 files changed, 155 insertions(+) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 781624f5913a..820e299f06b5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6435,6 +6435,11 @@ static inline void wiphy_delayed_work_init(struct wiphy_delayed_work *dwork, * after wiphy_lock() was called. Therefore, wiphy_cancel_work() can * use just cancel_work() instead of cancel_work_sync(), it requires * being in a section protected by wiphy_lock(). + * + * Note that these are scheduled with a timer where the accuracy + * becomes less the longer in the future the scheduled timer is. Use + * wiphy_hrtimer_work_queue() if the timer must be not be late by more + * than approximately 10 percent. */ void wiphy_delayed_work_queue(struct wiphy *wiphy, struct wiphy_delayed_work *dwork, @@ -6506,6 +6511,79 @@ void wiphy_delayed_work_flush(struct wiphy *wiphy, bool wiphy_delayed_work_pending(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); +struct wiphy_hrtimer_work { + struct wiphy_work work; + struct wiphy *wiphy; + struct hrtimer timer; +}; + +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t); + +static inline void wiphy_hrtimer_work_init(struct wiphy_hrtimer_work *hrwork, + wiphy_work_func_t func) +{ + hrtimer_setup(&hrwork->timer, wiphy_hrtimer_work_timer, + CLOCK_BOOTTIME, HRTIMER_MODE_REL); + wiphy_work_init(&hrwork->work, func); +} + +/** + * wiphy_hrtimer_work_queue - queue hrtimer work for the wiphy + * @wiphy: the wiphy to queue for + * @hrwork: the high resolution timer worker + * @delay: the delay given as a ktime_t + * + * Please refer to wiphy_delayed_work_queue(). The difference is that + * the hrtimer work uses a high resolution timer for scheduling. This + * may be needed if timeouts might be scheduled further in the future + * and the accuracy of the normal timer is not sufficient. + * + * Expect a delay of a few milliseconds as the timer is scheduled + * with some slack and some more time may pass between queueing the + * work and its start. + */ +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay); + +/** + * wiphy_hrtimer_work_cancel - cancel previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrtimer: the hrtimer work to cancel + * + * Cancel the work *without* waiting for it, this assumes being + * called under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrtimer); + +/** + * wiphy_hrtimer_work_flush - flush previously queued hrtimer work + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work to flush + * + * Flush the work (i.e. run it if pending). This must be called + * under the wiphy mutex acquired by wiphy_lock(). + */ +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + +/** + * wiphy_hrtimer_work_pending - Find out whether a wiphy hrtimer + * work item is currently pending. + * + * @wiphy: the wiphy, for debug purposes + * @hrwork: the hrtimer work in question + * + * Return: true if timer is pending, false otherwise + * + * Please refer to the wiphy_delayed_work_pending() documentation as + * this is the equivalent function for hrtimer based delayed work + * items. + */ +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork); + /** * enum ieee80211_ap_reg_power - regulatory power for an Access Point * diff --git a/net/wireless/core.c b/net/wireless/core.c index 797f9f2004a6..54a34d8d356e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1787,6 +1787,62 @@ bool wiphy_delayed_work_pending(struct wiphy *wiphy, } EXPORT_SYMBOL_GPL(wiphy_delayed_work_pending); +enum hrtimer_restart wiphy_hrtimer_work_timer(struct hrtimer *t) +{ + struct wiphy_hrtimer_work *hrwork = + container_of(t, struct wiphy_hrtimer_work, timer); + + wiphy_work_queue(hrwork->wiphy, &hrwork->work); + + return HRTIMER_NORESTART; +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_timer); + +void wiphy_hrtimer_work_queue(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork, + ktime_t delay) +{ + trace_wiphy_hrtimer_work_queue(wiphy, &hrwork->work, delay); + + if (!delay) { + hrtimer_cancel(&hrwork->timer); + wiphy_work_queue(wiphy, &hrwork->work); + return; + } + + hrwork->wiphy = wiphy; + hrtimer_start_range_ns(&hrwork->timer, delay, + 1000 * NSEC_PER_USEC, HRTIMER_MODE_REL); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_queue); + +void wiphy_hrtimer_work_cancel(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_cancel(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_cancel); + +void wiphy_hrtimer_work_flush(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + lockdep_assert_held(&wiphy->mtx); + + hrtimer_cancel(&hrwork->timer); + wiphy_work_flush(wiphy, &hrwork->work); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_flush); + +bool wiphy_hrtimer_work_pending(struct wiphy *wiphy, + struct wiphy_hrtimer_work *hrwork) +{ + return hrtimer_is_queued(&hrwork->timer); +} +EXPORT_SYMBOL_GPL(wiphy_hrtimer_work_pending); + static int __init cfg80211_init(void) { int err; diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8a4c34112eb5..2b71f1d867a0 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -304,6 +304,27 @@ TRACE_EVENT(wiphy_delayed_work_queue, __entry->delay) ); +TRACE_EVENT(wiphy_hrtimer_work_queue, + TP_PROTO(struct wiphy *wiphy, struct wiphy_work *work, + ktime_t delay), + TP_ARGS(wiphy, work, delay), + TP_STRUCT__entry( + WIPHY_ENTRY + __field(void *, instance) + __field(void *, func) + __field(ktime_t, delay) + ), + TP_fast_assign( + WIPHY_ASSIGN; + __entry->instance = work; + __entry->func = work->func; + __entry->delay = delay; + ), + TP_printk(WIPHY_PR_FMT " instance=%p func=%pS delay=%llu", + WIPHY_PR_ARG, __entry->instance, __entry->func, + __entry->delay) +); + TRACE_EVENT(wiphy_work_worker_start, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy), From dfa865d490b1bd252045463588a91a4d3c82f3c8 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:38 +0200 Subject: [PATCH 0458/1075] wifi: mac80211: use wiphy_hrtimer_work for ttlm_work The work item may be scheduled relatively far in the future. As the event happens at a specific point in time, the normal timer accuracy is not sufficient in that case. Switch to use wiphy_hrtimer_work so that the accuracy is sufficient. CC: stable@vger.kernel.org Fixes: 702e80470a33 ("wifi: mac80211: support handling of advertised TID-to-link mapping") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.83c2c611545e.I35498a6d883ea24b0dc4910cf521aa768d2a0e90@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 73fd86ec1bce..eb22279c6e01 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -616,7 +616,7 @@ struct ieee80211_if_managed { u16 removed_links; /* TID-to-link mapping support */ - struct wiphy_delayed_work ttlm_work; + struct wiphy_hrtimer_work ttlm_work; struct ieee80211_adv_ttlm_info ttlm_info; struct wiphy_work teardown_ttlm_work; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 3b5827ea438e..623a46b3214e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -45,7 +45,7 @@ #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 -#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100) +#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS (100 * USEC_PER_MSEC) #define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00 #define IEEE80211_NEG_TTLM_REQ_TIMEOUT (HZ / 5) @@ -4242,7 +4242,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(&sdata->u.mgd.ttlm_info, 0, sizeof(sdata->u.mgd.ttlm_info)); - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &ifmgd->ttlm_work); memset(&sdata->vif.neg_ttlm, 0, sizeof(sdata->vif.neg_ttlm)); wiphy_delayed_work_cancel(sdata->local->hw.wiphy, @@ -7095,7 +7095,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, /* if a planned TID-to-link mapping was cancelled - * abort it */ - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); } else if (sdata->u.mgd.ttlm_info.active) { /* if no TID-to-link element, set to default mapping in @@ -7130,7 +7130,7 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (ttlm_info.switch_time) { u16 beacon_ts_tu, st_tu, delay; - u32 delay_jiffies; + u64 delay_usec; u64 mask; /* The t2l map switch time is indicated with a partial @@ -7152,23 +7152,23 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata, if (delay > IEEE80211_ADV_TTLM_ST_UNDERFLOW) return; - delay_jiffies = TU_TO_JIFFIES(delay); + delay_usec = ieee80211_tu_to_usec(delay); /* Link switching can take time, so schedule it * 100ms before to be ready on time */ - if (delay_jiffies > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) - delay_jiffies -= + if (delay_usec > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS) + delay_usec -= IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS; else - delay_jiffies = 0; + delay_usec = 0; sdata->u.mgd.ttlm_info = ttlm_info; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work); - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ttlm_work, - delay_jiffies); + us_to_ktime(delay_usec)); return; } } @@ -8802,7 +8802,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) timer_setup(&ifmgd->conn_mon_timer, ieee80211_sta_conn_mon_timer, 0); wiphy_delayed_work_init(&ifmgd->tx_tspec_wk, ieee80211_sta_handle_tspec_ac_params_wk); - wiphy_delayed_work_init(&ifmgd->ttlm_work, + wiphy_hrtimer_work_init(&ifmgd->ttlm_work, ieee80211_tid_to_link_map_work); wiphy_delayed_work_init(&ifmgd->neg_ttlm_timeout_work, ieee80211_neg_ttlm_timeout_work); From 3f654d53dff565095d83a84e3b6187526dadf4c8 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:39 +0200 Subject: [PATCH 0459/1075] wifi: mac80211: use wiphy_hrtimer_work for ml_reconf_work The work item may be scheduled relatively far in the future. As the event happens at a specific point in time, the normal timer accuracy is not sufficient in that case. Switch to use wiphy_hrtimer_work so that the accuracy is sufficient. CC: stable@vger.kernel.org Fixes: 8eb8dd2ffbbb ("wifi: mac80211: Support link removal using Reconfiguration ML element") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.24a7b54e9e37.I063c5c15bf7672f94cea75f83e486a3ca52d098f@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mlme.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb22279c6e01..eb38049b2252 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -612,7 +612,7 @@ struct ieee80211_if_managed { u8 *assoc_req_ies; size_t assoc_req_ies_len; - struct wiphy_delayed_work ml_reconf_work; + struct wiphy_hrtimer_work ml_reconf_work; u16 removed_links; /* TID-to-link mapping support */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 623a46b3214e..f95bcf84ecc2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4249,7 +4249,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, &ifmgd->neg_ttlm_timeout_work); sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); wiphy_work_cancel(sdata->local->hw.wiphy, @@ -6876,7 +6876,7 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, /* In case the removal was cancelled, abort it */ if (sdata->u.mgd.removed_links) { sdata->u.mgd.removed_links = 0; - wiphy_delayed_work_cancel(sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work); } return; @@ -6906,9 +6906,9 @@ static void ieee80211_ml_reconfiguration(struct ieee80211_sub_if_data *sdata, } sdata->u.mgd.removed_links = removed_links; - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &sdata->u.mgd.ml_reconf_work, - TU_TO_JIFFIES(delay)); + us_to_ktime(ieee80211_tu_to_usec(delay))); } static int ieee80211_ttlm_set_links(struct ieee80211_sub_if_data *sdata, @@ -8793,7 +8793,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_csa_connection_drop_work); wiphy_delayed_work_init(&ifmgd->tdls_peer_del_work, ieee80211_tdls_peer_del_work); - wiphy_delayed_work_init(&ifmgd->ml_reconf_work, + wiphy_hrtimer_work_init(&ifmgd->ml_reconf_work, ieee80211_ml_reconf_work); wiphy_delayed_work_init(&ifmgd->reconf.wk, ieee80211_ml_sta_reconf_timeout); From fbc1cc6973099f45e4c30b86f12b4435c7cb7d24 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Tue, 28 Oct 2025 12:58:40 +0200 Subject: [PATCH 0460/1075] wifi: mac80211: use wiphy_hrtimer_work for csa.switch_work The work item may be scheduled relatively far in the future. As the event happens at a specific point in time, the normal timer accuracy is not sufficient in that case. Switch to use wiphy_hrtimer_work so that the accuracy is sufficient. To make this work, use the same clock to store the timestamp. CC: stable@vger.kernel.org Fixes: ec3252bff7b6 ("wifi: mac80211: use wiphy work for channel switch") Signed-off-by: Benjamin Berg Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251028125710.68258c7e4ac4.I4ff2b2cdffbbf858bf5f08baccc7a88c4f9efe6f@changeid Signed-off-by: Johannes Berg --- net/mac80211/chan.c | 2 +- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/link.c | 4 ++-- net/mac80211/mlme.c | 18 +++++++++--------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 57065714cf8c..7f8799fd673e 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1290,7 +1290,7 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) &link->csa.finalize_work); break; case NL80211_IFTYPE_STATION: - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); break; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb38049b2252..878c3b14aeb8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1017,10 +1017,10 @@ struct ieee80211_link_data_managed { bool operating_11g_mode; struct { - struct wiphy_delayed_work switch_work; + struct wiphy_hrtimer_work switch_work; struct cfg80211_chan_def ap_chandef; struct ieee80211_parsed_tpe tpe; - unsigned long time; + ktime_t time; bool waiting_bcn; bool ignored_same_chan; bool blocked_tx; diff --git a/net/mac80211/link.c b/net/mac80211/link.c index d71eabe5abf8..4a19b765ccb6 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -472,10 +472,10 @@ static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, * from there. */ if (link->conf->csa_active) - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - - jiffies); + ktime_get_boottime()); } for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f95bcf84ecc2..f3138d158535 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2594,7 +2594,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success, return; } - wiphy_delayed_work_queue(sdata->local->hw.wiphy, + wiphy_hrtimer_work_queue(sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work, 0); } @@ -2753,7 +2753,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, .timestamp = timestamp, .device_timestamp = device_timestamp, }; - unsigned long now; + u32 csa_time_tu; + ktime_t now; int res; lockdep_assert_wiphy(local->hw.wiphy); @@ -2983,10 +2984,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, csa_ie.mode); /* we may have to handle timeout for deactivated link in software */ - now = jiffies; - link->u.mgd.csa.time = now + - TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) * - link->conf->beacon_int); + now = ktime_get_boottime(); + csa_time_tu = (max_t(int, csa_ie.count, 1) - 1) * link->conf->beacon_int; + link->u.mgd.csa.time = now + us_to_ktime(ieee80211_tu_to_usec(csa_time_tu)); if (ieee80211_vif_link_active(&sdata->vif, link->link_id) && local->ops->channel_switch) { @@ -3001,7 +3001,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link, } /* channel switch handled in software */ - wiphy_delayed_work_queue(local->hw.wiphy, + wiphy_hrtimer_work_queue(local->hw.wiphy, &link->u.mgd.csa.switch_work, link->u.mgd.csa.time - now); return; @@ -8849,7 +8849,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) else link->u.mgd.req_smps = IEEE80211_SMPS_OFF; - wiphy_delayed_work_init(&link->u.mgd.csa.switch_work, + wiphy_hrtimer_work_init(&link->u.mgd.csa.switch_work, ieee80211_csa_switch_work); ieee80211_clear_tpe(&link->conf->tpe); @@ -10064,7 +10064,7 @@ void ieee80211_mgd_stop_link(struct ieee80211_link_data *link) &link->u.mgd.request_smps_work); wiphy_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.recalc_smps); - wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy, + wiphy_hrtimer_work_cancel(link->sdata->local->hw.wiphy, &link->u.mgd.csa.switch_work); } From 926d002e6d7e2f1fd5c1b53cf6208153ee7d380d Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 6 Oct 2025 11:39:37 +0200 Subject: [PATCH 0461/1075] drm/mediatek: Fix device use-after-free on unbind A recent change fixed device reference leaks when looking up drm platform device driver data during bind() but failed to remove a partial fix which had been added by commit 80805b62ea5b ("drm/mediatek: Fix kobject put for component sub-drivers"). This results in a reference imbalance on component bind() failures and on unbind() which could lead to a user-after-free. Make sure to only drop the references after retrieving the driver data by effectively reverting the previous partial fix. Note that holding a reference to a device does not prevent its driver data from going away so there is no point in keeping the reference. Fixes: 1f403699c40f ("drm/mediatek: Fix device/node reference count leaks in mtk_drm_get_all_drm_priv") Reported-by: Sjoerd Simons Closes: https://lore.kernel.org/r/20251003-mtk-drm-refcount-v1-1-3b3f2813b0db@collabora.com Cc: stable@vger.kernel.org Cc: Ma Ke Cc: AngeloGioacchino Del Regno Signed-off-by: Johan Hovold Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Sjoerd Simons Tested-by: Sjoerd Simons Tested-by: Ritesh Raj Sarraf Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20251006093937.27869-1-johan@kernel.org/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_drv.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index eb5537f0ac90..31ff2922758a 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -686,10 +686,6 @@ static int mtk_drm_bind(struct device *dev) for (i = 0; i < private->data->mmsys_dev_num; i++) private->all_drm_private[i]->drm = NULL; err_put_dev: - for (i = 0; i < private->data->mmsys_dev_num; i++) { - /* For device_find_child in mtk_drm_get_all_priv() */ - put_device(private->all_drm_private[i]->dev); - } put_device(private->mutex_dev); return ret; } @@ -697,18 +693,12 @@ static int mtk_drm_bind(struct device *dev) static void mtk_drm_unbind(struct device *dev) { struct mtk_drm_private *private = dev_get_drvdata(dev); - int i; /* for multi mmsys dev, unregister drm dev in mmsys master */ if (private->drm_master) { drm_dev_unregister(private->drm); mtk_drm_kms_deinit(private->drm); drm_dev_put(private->drm); - - for (i = 0; i < private->data->mmsys_dev_num; i++) { - /* For device_find_child in mtk_drm_get_all_priv() */ - put_device(private->all_drm_private[i]->dev); - } put_device(private->mutex_dev); } private->mtk_drm_bound = false; From ba10f8d92a2c026b1052b4c0fa2cd7538838c965 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 16 Oct 2025 13:55:27 -0500 Subject: [PATCH 0462/1075] drm/amd: Check that VPE has reached DPM0 in idle handler [Why] Newer VPE microcode has functionality that will decrease DPM level only when a workload has run for 2 or more seconds. If VPE is turned off before this DPM decrease and the PMFW doesn't reset it when power gating VPE, the SOC can get stuck with a higher DPM level. This can happen from amdgpu's ring buffer test because it's a short quick workload for VPE and VPE is turned off after 1s. [How] In idle handler besides checking fences are drained check PMFW version to determine if it will reset DPM when power gating VPE. If PMFW will not do this, then check VPE DPM level. If it is not DPM0 reschedule delayed work again until it is. v2: squash in return fix (Alex) Cc: Peyton.Lee@amd.com Reported-by: Sultan Alsawaf Reviewed-by: Sultan Alsawaf Tested-by: Sultan Alsawaf Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4615 Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 3ac635367eb589bee8edcc722f812a89970e14b7) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c | 34 ++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 474bfe36c0c2..aa78c2ee9e21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -322,6 +322,26 @@ static int vpe_early_init(struct amdgpu_ip_block *ip_block) return 0; } +static bool vpe_need_dpm0_at_power_down(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { + case IP_VERSION(6, 1, 1): + return adev->pm.fw_version < 0x0a640500; + default: + return false; + } +} + +static int vpe_get_dpm_level(struct amdgpu_device *adev) +{ + struct amdgpu_vpe *vpe = &adev->vpe; + + if (!adev->pm.dpm_enabled) + return 0; + + return RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_lv)); +} + static void vpe_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = @@ -329,11 +349,17 @@ static void vpe_idle_work_handler(struct work_struct *work) unsigned int fences = 0; fences += amdgpu_fence_count_emitted(&adev->vpe.ring); + if (fences) + goto reschedule; - if (fences == 0) - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); - else - schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); + if (vpe_need_dpm0_at_power_down(adev) && vpe_get_dpm_level(adev) != 0) + goto reschedule; + + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + return; + +reschedule: + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); } static int vpe_common_init(struct amdgpu_vpe *vpe) From 3328443363a0895fd9c096edfe8ecd372ca9145e Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Sat, 18 Oct 2025 14:44:50 +0900 Subject: [PATCH 0463/1075] drm/radeon: Do not kfree() devres managed rdev Since the allocation of the drivers main structure was changed to devm_drm_dev_alloc() rdev is managed by devres and we shouldn't be calling kfree() on it. This fixes things exploding if the driver probe fails and devres cleans up the rdev after we already free'd it. Fixes: a9ed2f052c5c ("drm/radeon: change drm_dev_alloc to devm_drm_dev_alloc") Signed-off-by: Daniel Palmer Signed-off-by: Alex Deucher (cherry picked from commit 16c0681617b8a045773d4d87b6140002fa75b03b) --- drivers/gpu/drm/radeon/radeon_kms.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 645e33bf7947..ba1446acd703 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -84,7 +84,6 @@ void radeon_driver_unload_kms(struct drm_device *dev) rdev->agp = NULL; done_free: - kfree(rdev); dev->dev_private = NULL; } From 745bae76acdd71709773c129a69deca01036250b Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Sat, 18 Oct 2025 14:44:51 +0900 Subject: [PATCH 0464/1075] drm/radeon: Remove calls to drm_put_dev() Since the allocation of the drivers main structure was changed to devm_drm_dev_alloc() drm_put_dev()'ing to trigger it to be free'd should be done by devres. However, drm_put_dev() is still in the probe error and device remove paths. When the driver fails to probe warnings like the following are shown because devres is trying to drm_put_dev() after the driver already did it. [ 5.642230] radeon 0000:01:05.0: probe with driver radeon failed with error -22 [ 5.649605] ------------[ cut here ]------------ [ 5.649607] refcount_t: underflow; use-after-free. [ 5.649620] WARNING: CPU: 0 PID: 357 at lib/refcount.c:28 refcount_warn_saturate+0xbe/0x110 Fixes: a9ed2f052c5c ("drm/radeon: change drm_dev_alloc to devm_drm_dev_alloc") Signed-off-by: Daniel Palmer Signed-off-by: Alex Deucher (cherry picked from commit 3eb8c0b4c091da0a623ade0d3ee7aa4a93df1ea4) --- drivers/gpu/drm/radeon/radeon_drv.c | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 88e821d67af7..9c8907bc61d9 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -314,17 +314,17 @@ static int radeon_pci_probe(struct pci_dev *pdev, ret = pci_enable_device(pdev); if (ret) - goto err_free; + return ret; pci_set_drvdata(pdev, ddev); ret = radeon_driver_load_kms(ddev, flags); if (ret) - goto err_agp; + goto err; ret = drm_dev_register(ddev, flags); if (ret) - goto err_agp; + goto err; if (rdev->mc.real_vram_size <= (8 * 1024 * 1024)) format = drm_format_info(DRM_FORMAT_C8); @@ -337,30 +337,14 @@ static int radeon_pci_probe(struct pci_dev *pdev, return 0; -err_agp: +err: pci_disable_device(pdev); -err_free: - drm_dev_put(ddev); return ret; } -static void -radeon_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); - - drm_put_dev(dev); -} - static void radeon_pci_shutdown(struct pci_dev *pdev) { - /* if we are running in a VM, make sure the device - * torn down properly on reboot/shutdown - */ - if (radeon_device_is_virtual()) - radeon_pci_remove(pdev); - #if defined(CONFIG_PPC64) || defined(CONFIG_MACH_LOONGSON64) /* * Some adapters need to be suspended before a @@ -613,7 +597,6 @@ static struct pci_driver radeon_kms_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, .probe = radeon_pci_probe, - .remove = radeon_pci_remove, .shutdown = radeon_pci_shutdown, .driver.pm = &radeon_pm_ops, }; From 5d7b36d1bffce8340b37cbba95ef743ed3adaefd Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 28 Mar 2025 10:34:57 +0800 Subject: [PATCH 0465/1075] drm/amd/display: pause the workload setting in dm v1: Pause the workload setting in dm when doinn idle optimization v2: Rebase patch to latest kernel code base (kernel 6.16) Reviewed-by: Alex Deucher Signed-off-by: Kenneth Feng Signed-off-by: Alex Deucher Signed-off-by: Yang Wang Signed-off-by: Alex Deucher (cherry picked from commit bc6d54ac7e7436721a19443265f971f890c13cc5) --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index 1ec9d03ad747..f08121a2b838 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -248,6 +248,8 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) struct vblank_control_work *vblank_work = container_of(work, struct vblank_control_work, work); struct amdgpu_display_manager *dm = vblank_work->dm; + struct amdgpu_device *adev = drm_to_adev(dm->ddev); + int r; mutex_lock(&dm->dc_lock); @@ -277,7 +279,16 @@ static void amdgpu_dm_crtc_vblank_control_worker(struct work_struct *work) if (dm->active_vblank_irq_count == 0) { dc_post_update_surfaces_to_stream(dm->dc); + + r = amdgpu_dpm_pause_power_profile(adev, true); + if (r) + dev_warn(adev->dev, "failed to set default power profile mode\n"); + dc_allow_idle_optimizations(dm->dc, true); + + r = amdgpu_dpm_pause_power_profile(adev, false); + if (r) + dev_warn(adev->dev, "failed to restore the power profile mode\n"); } mutex_unlock(&dm->dc_lock); From 382bd6a792836875da555fe9a2b51222b813fed1 Mon Sep 17 00:00:00 2001 From: Matthew Schwartz Date: Mon, 20 Oct 2025 16:09:34 -0700 Subject: [PATCH 0466/1075] drm/amd/display: Don't program BLNDGAM_MEM_PWR_FORCE when CM low-power is disabled on DCN30 Before commit 33056a97ae5e ("drm/amd/display: Remove double checks for `debug.enable_mem_low_power.bits.cm`"), dpp3_program_blnd_lut(NULL) checked the low-power debug flag before calling dpp3_power_on_blnd_lut(false). After commit 33056a97ae5e ("drm/amd/display: Remove double checks for `debug.enable_mem_low_power.bits.cm`"), dpp3_program_blnd_lut(NULL) unconditionally calls dpp3_power_on_blnd_lut(false). The BLNDGAM power helper writes BLNDGAM_MEM_PWR_FORCE when CM low-power is disabled, causing immediate SRAM power toggles instead of deferring at vupdate. This can disrupt atomic color/LUT sequencing during transitions between direct scanout and composition within gamescope's DRM backend on Steam Deck OLED. To fix this, leave the BLNDGAM power state unchanged when low-power is disabled, matching dpp3_power_on_hdr3dlut and dpp3_power_on_shaper. Fixes: 33056a97ae5e ("drm/amd/display: Remove double checks for `debug.enable_mem_low_power.bits.cm`") Signed-off-by: Matthew Schwartz Reviewed-by: Harry Wentland Reviewed-by: Mario Limonciello Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 13ff4f63fcddfc84ec8632f1443936b00aa26725) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c index 09be2a90cc79..4f569cd8a5d6 100644 --- a/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn30/dcn30_dpp.c @@ -578,9 +578,6 @@ static void dpp3_power_on_blnd_lut( dpp_base->ctx->dc->optimized_required = true; dpp_base->deferred_reg_writes.bits.disable_blnd_lut = true; } - } else { - REG_SET(CM_MEM_PWR_CTRL, 0, - BLNDGAM_MEM_PWR_FORCE, power_on == true ? 0 : 1); } } From 238d468d3ed18a324bb9d8c99f18c665dbac0511 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 22 Oct 2025 14:12:21 +0800 Subject: [PATCH 0467/1075] drm/amd/pm: fix smu table id bound check issue in smu_cmn_update_table() 'table_index' is a variable defined by the smu driver (kmd) 'table_id' is a variable defined by the hw smu (pmfw) This code should use table_index as a bounds check. Fixes: caad2613dc4bd ("drm/amd/powerplay: move table setting common code to smu_cmn.c") Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher (cherry picked from commit fca0c66b22303de0d1d6313059baf4dc960a4753) --- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index f532f7c69259..a8961a8f5c42 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -969,7 +969,7 @@ int smu_cmn_update_table(struct smu_context *smu, table_index); uint32_t table_size; int ret = 0; - if (!table_data || table_id >= SMU_TABLE_COUNT || table_id < 0) + if (!table_data || table_index >= SMU_TABLE_COUNT || table_id < 0) return -EINVAL; table_size = smu_table->tables[table_index].size; From 07a13f913c291d6ec72ee4fc848d13ecfdc0e705 Mon Sep 17 00:00:00 2001 From: John Smith Date: Tue, 21 Oct 2025 11:08:13 +0200 Subject: [PATCH 0468/1075] drm/amd/pm/powerplay/smumgr: Fix PCIeBootLinkLevel value on Fiji Previously this was initialized with zero which represented PCIe Gen 1.0 instead of using the maximum value from the speed table which is the behaviour of all other smumgr implementations. Fixes: 18edef19ea44 ("drm/amd/powerplay: implement fw image related smu interface for Fiji.") Signed-off-by: John Smith Signed-off-by: Alex Deucher (cherry picked from commit c52238c9fb414555c68340cd80e487d982c1921c) --- drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c index d2dbd90bb427..0a876c840c79 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/fiji_smumgr.c @@ -2024,7 +2024,7 @@ static int fiji_init_smc_table(struct pp_hwmgr *hwmgr) table->VoltageResponseTime = 0; table->PhaseResponseTime = 0; table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; /* 0:Gen1 1:Gen2 2:Gen3*/ + table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); table->PCIeGenInterval = 1; table->VRConfig = 0; From 501672e3c1576aa9a8364144213c77b98a31a42c Mon Sep 17 00:00:00 2001 From: John Smith Date: Tue, 21 Oct 2025 11:09:09 +0200 Subject: [PATCH 0469/1075] drm/amd/pm/powerplay/smumgr: Fix PCIeBootLinkLevel value on Iceland Previously this was initialized with zero which represented PCIe Gen 1.0 instead of using the maximum value from the speed table which is the behaviour of all other smumgr implementations. Fixes: 18aafc59b106 ("drm/amd/powerplay: implement fw related smu interface for iceland.") Signed-off-by: John Smith Signed-off-by: Alex Deucher (cherry picked from commit 92b0a6ae6672857ddeabf892223943d2f0e06c97) --- drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c index 1f50f1e74c48..aa3ae9b115c4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/iceland_smumgr.c @@ -2028,7 +2028,7 @@ static int iceland_init_smc_table(struct pp_hwmgr *hwmgr) table->VoltageResponseTime = 0; table->PhaseResponseTime = 0; table->MemoryThermThrottleEnable = 1; - table->PCIeBootLinkLevel = 0; + table->PCIeBootLinkLevel = (uint8_t) (data->dpm_table.pcie_speed_table.count); table->PCIeGenInterval = 1; result = iceland_populate_smc_svi2_config(hwmgr, table); From f3b37ebf2c94e3a3d7bbf5e3788ad86cf30fc7be Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Oct 2025 09:12:54 -0400 Subject: [PATCH 0470/1075] drm/amdgpu: fix SPDX headers on amdgpu_cper.c/h These should be MIT. The driver in general is MIT and the license text at the top of the files is MIT so fix it. Fixes: 92d5d2a09de1 ("drm/amdgpu: Introduce funcs for populating CPER") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4654 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit abd3f876404cafb107cb34bacb74706bfee11cbe) --- drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c index ef996493115f..425a3e564360 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: MIT /* * Copyright 2025 Advanced Micro Devices, Inc. * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h index bcb97d245673..353421807387 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2025 Advanced Micro Devices, Inc. * From 964f8ff276a54ad7fb09168141fb6a8d891d548a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Oct 2025 09:14:55 -0400 Subject: [PATCH 0471/1075] drm/amdgpu: fix SPDX header on amd_cper.h This should be MIT. The driver in general is MIT and the license text at the top of the file is MIT so fix it. Fixes: 523b69c65445 ("drm/amd/include: Add amd cper header") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4654 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 72c5482cb0f3d3c772c9de50e5a4265258a53f81) --- drivers/gpu/drm/amd/include/amd_cper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/amd_cper.h b/drivers/gpu/drm/amd/include/amd_cper.h index 086869264425..a252ee4c7874 100644 --- a/drivers/gpu/drm/amd/include/amd_cper.h +++ b/drivers/gpu/drm/amd/include/amd_cper.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2025 Advanced Micro Devices, Inc. * From 8284a9e91722d3214aac5d54b4e0d2c91af0fdfc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Oct 2025 09:17:37 -0400 Subject: [PATCH 0472/1075] drm/amdgpu: fix SPDX header on irqsrcs_vcn_5_0.h This should be MIT. The driver in general is MIT and the license text at the top of the file is MIT so fix it. Fixes: d1bb64651095 ("drm/amdgpu: add irq source ids for VCN5_0/JPEG5_0") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4654 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 68c20d7b1779f97d600e61b9e95726c0cd609e2a) --- drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h index 64b553e7de1a..e7fdcee22a71 100644 --- a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h +++ b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: MIT */ /* * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. From 4f2cd64510e7ae0b3a6ec1c10826cb6baf04edfa Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 22 Oct 2025 09:19:55 -0400 Subject: [PATCH 0473/1075] drm/amdgpu: fix SPDX header on cyan_skillfish_reg_init.c This should be MIT. The driver in general is MIT and the license text at the top of the file is MIT so fix it. Fixes: e8529dbc75ca ("drm/amdgpu: add ip offset support for cyan skillfish") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4654 Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit 102c4f7c554ac5a5ecf0023fa0612beb58e3b0bd) --- drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c index 96616a865aac..ed1e25661706 100644 --- a/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/cyan_skillfish_reg_init.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: MIT /* * Copyright 2018 Advanced Micro Devices, Inc. * From 7d08c3b1731014dd1cfd0bf8b0cb1cef9dfd191e Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Thu, 16 Oct 2025 20:08:10 -0600 Subject: [PATCH 0474/1075] drm/amd/display: Add HDR workaround for a specific eDP [WHY & HOW] Some eDP panels suffer from flicking when HDR is enabled in KDE or Gnome. This add another quirk to worksaround to skip VSC that is incompatible with an eDP panel. Link: https://gitlab.freedesktop.org/drm/amd/-/issues/4452 Reviewed-by: Aurabindo Pillai Signed-off-by: Alex Hung Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 99441824bec63549a076cd86631d138ec9a0c71c) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index fe100e4c9801..cc21337a182f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -83,6 +83,7 @@ static void apply_edid_quirks(struct drm_device *dev, struct edid *edid, struct edid_caps->panel_patch.remove_sink_ext_caps = true; break; case drm_edid_encode_panel_id('S', 'D', 'C', 0x4154): + case drm_edid_encode_panel_id('S', 'D', 'C', 0x4171): drm_dbg_driver(dev, "Disabling VSC on monitor with panel id %X\n", panel_id); edid_caps->panel_patch.disable_colorimetry = true; break; From b3656b355b5522cef1b52a7469010009c98156db Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Wed, 17 Sep 2025 11:00:02 -0400 Subject: [PATCH 0475/1075] drm/amd/display: Fix incorrect return of vblank enable on unconfigured crtc [Why&How] Return -EINVAL when userspace asks us to enable vblank on a crtc that is not yet enabled. Suggested-by: Aurabindo Pillai Reviewed-by: Aurabindo Pillai Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/1856 Signed-off-by: Ivan Lipski Signed-off-by: Wayne Lin Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit cb57b8cdb072dc37723b6906da1c37ff9cbc2da4) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c index f08121a2b838..38f9ea313dcb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c @@ -308,8 +308,12 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) int irq_type; int rc = 0; - if (acrtc->otg_inst == -1) - goto skip; + if (enable && !acrtc->base.enabled) { + drm_dbg_vbl(crtc->dev, + "Reject vblank enable on unconfigured CRTC %d (enabled=%d)\n", + acrtc->crtc_id, acrtc->base.enabled); + return -EINVAL; + } irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, acrtc->crtc_id); @@ -394,7 +398,7 @@ static inline int amdgpu_dm_crtc_set_vblank(struct drm_crtc *crtc, bool enable) return rc; } #endif -skip: + if (amdgpu_in_reset(adev)) return 0; From f0f7a3f542c1698edb69075f25a3f846207facba Mon Sep 17 00:00:00 2001 From: Qiu Wenbo Date: Tue, 28 Oct 2025 14:30:09 +0800 Subject: [PATCH 0476/1075] platform/x86: int3472: Fix double free of GPIO device during unregister MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit regulator_unregister() already frees the associated GPIO device. On ThinkPad X9 (Lunar Lake), this causes a double free issue that leads to random failures when other drivers (typically Intel THC) attempt to allocate interrupts. The root cause is that the reference count of the pinctrl_intel_platform module unexpectedly drops to zero when this driver defers its probe. This behavior can also be reproduced by unloading the module directly. Fix the issue by removing the redundant release of the GPIO device during regulator unregistration. Cc: stable@vger.kernel.org Fixes: 1e5d088a52c2 ("platform/x86: int3472: Stop using devm_gpiod_get()") Signed-off-by: Qiu Wenbo Reviewed-by: Andy Shevchenko Reviewed-by: Sakari Ailus Reviewed-by: Hans de Goede Reviewed-by: Daniel Scally Link: https://patch.msgid.link/20251028063009.289414-1-qiuwenbo@gnome.org Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/int3472/clk_and_regulator.c | 5 +---- include/linux/platform_data/x86/int3472.h | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c index 476ec24d3702..9e052b164a1a 100644 --- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c +++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c @@ -245,15 +245,12 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, if (IS_ERR(regulator->rdev)) return PTR_ERR(regulator->rdev); - int3472->regulators[int3472->n_regulator_gpios].ena_gpio = gpio; int3472->n_regulator_gpios++; return 0; } void skl_int3472_unregister_regulator(struct int3472_discrete_device *int3472) { - for (int i = 0; i < int3472->n_regulator_gpios; i++) { + for (int i = 0; i < int3472->n_regulator_gpios; i++) regulator_unregister(int3472->regulators[i].rdev); - gpiod_put(int3472->regulators[i].ena_gpio); - } } diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 1571e9157fa5..b1b837583d54 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -100,7 +100,6 @@ struct int3472_gpio_regulator { struct regulator_consumer_supply supply_map[GPIO_REGULATOR_SUPPLY_MAP_COUNT * 2]; char supply_name_upper[GPIO_SUPPLY_NAME_LENGTH]; char regulator_name[GPIO_REGULATOR_NAME_LENGTH]; - struct gpio_desc *ena_gpio; struct regulator_dev *rdev; struct regulator_desc rdesc; }; From 7f7d28c69eda3692bcf102b7096b93fd45c75b1d Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 28 Oct 2025 10:49:59 +0200 Subject: [PATCH 0477/1075] MAINTAINERS: Update int3472 maintainers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add myself as the maintainer of the int3472 driver. Also update Daniel's e-mail address while at it. Signed-off-by: Sakari Ailus Acked-by: Daniel Scally Link: https://patch.msgid.link/20251028084959.394795-1-sakari.ailus@linux.intel.com Signed-off-by: Ilpo Järvinen --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 832f3279ea83..c2bf47675a03 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12856,7 +12856,8 @@ F: tools/testing/selftests/sgx/* K: \bSGX_ INTEL SKYLAKE INT3472 ACPI DEVICE DRIVER -M: Daniel Scally +M: Daniel Scally +M: Sakari Ailus S: Maintained F: drivers/platform/x86/intel/int3472/ F: include/linux/platform_data/x86/int3472.h From 8f3eaad9812f62e7006ad08602444b32c3101824 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Oct 2025 17:23:30 +0200 Subject: [PATCH 0478/1075] Input: Add keycodes for electronic privacy screen on/off hotkeys MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add keycodes for hotkeys toggling the electronic privacy screen found on some laptops on/off. There already is an API for eprivacy screens as kernel-mode-setting drm connector object properties: https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#standard-connector-properties this API also supports reporting when the eprivacy screen is turned on/off by the embedded-controller (EC) in response to hotkey presses. But on some laptops (e.g. the Dell Latitude 7300) the firmware does not allow querying the presence nor the status of the eprivacy screen at boot. This makes it impossible to implement the drm connector properties API since drm objects do not allow adding new properties after creation and the presence of the eprivacy cannot be detected at boot. The first notice of the presence of an eprivacy screen on these laptops is an EC generated (WMI) event when the eprivacy screen hotkeys are pressed. In this case the new keycodes this change adds can be generated to notify userspace of the eprivacy screen on/off hotkeys being pressed, so that userspace can show the usual on-screen-display (OSD) notification for eprivacy screen on/off to the user. This is similar to how e.g. touchpad on/off keycodes are used to show the touchpad on/off OSD. Signed-off-by: Hans de Goede Acked-by: Dmitry Torokhov Link: https://patch.msgid.link/20251020152331.52870-2-hansg@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- include/uapi/linux/input-event-codes.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 4a9fbf42aa9f..9cd89bcc1d9c 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -631,6 +631,18 @@ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ +/* + * Keycodes for hotkeys toggling the electronic privacy screen found on some + * laptops on/off. Note when the embedded-controller turns on/off the eprivacy + * screen itself then the state should be reported through drm connecter props: + * https://www.kernel.org/doc/html/latest/gpu/drm-kms.html#standard-connector-properties + * Except when implementing the drm connecter properties API is not possible + * because e.g. the firmware does not allow querying the presence and/or status + * of the eprivacy screen at boot. + */ +#define KEY_EPRIVACY_SCREEN_ON 0x252 +#define KEY_EPRIVACY_SCREEN_OFF 0x253 + #define KEY_KBDINPUTASSIST_PREV 0x260 #define KEY_KBDINPUTASSIST_NEXT 0x261 #define KEY_KBDINPUTASSIST_PREVGROUP 0x262 From 4173edb076b3ae30d734d55fce0ebac63139b656 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 20 Oct 2025 17:23:31 +0200 Subject: [PATCH 0479/1075] platform/x86: dell-wmi-base: Handle electronic privacy screen on/off events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add handling for events for the electronic privacy screen found on some models (e.g. Dell Latitude 7300) being toggled on/off. Emit KEY_EPRIVACY_SCREEN_OFF / KEY_EPRIVACY_SCREEN_ON events for this so that userspace can show the usual on-screen-display (OSD) notification for eprivacy screen on/off to the user. Signed-off-by: Hans de Goede Link: https://patch.msgid.link/20251020152331.52870-3-hansg@kernel.org Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/dell/dell-wmi-base.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/platform/x86/dell/dell-wmi-base.c b/drivers/platform/x86/dell/dell-wmi-base.c index 841a5414d28a..28076929d6af 100644 --- a/drivers/platform/x86/dell/dell-wmi-base.c +++ b/drivers/platform/x86/dell/dell-wmi-base.c @@ -365,6 +365,13 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = { /* Backlight brightness change event */ { KE_IGNORE, 0x0003, { KEY_RESERVED } }, + /* + * Electronic privacy screen toggled, extended data gives state, + * separate entries for on/off see handling in dell_wmi_process_key(). + */ + { KE_KEY, 0x000c, { KEY_EPRIVACY_SCREEN_OFF } }, + { KE_KEY, 0x000c, { KEY_EPRIVACY_SCREEN_ON } }, + /* Ultra-performance mode switch request */ { KE_IGNORE, 0x000d, { KEY_RESERVED } }, @@ -435,6 +442,11 @@ static int dell_wmi_process_key(struct wmi_device *wdev, int type, int code, u16 "Dell tablet mode switch", SW_TABLET_MODE, !buffer[0]); return 1; + } else if (type == 0x0012 && code == 0x000c && remaining > 0) { + /* Eprivacy toggle, switch to "on" key entry for on events */ + if (buffer[0] == 2) + key++; + used = 1; } else if (type == 0x0012 && code == 0x000d && remaining > 0) { value = (buffer[2] == 2); used = 1; From 48cbf50531d8eca15b8a811717afdebb8677de9b Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 24 Oct 2025 16:23:44 +0800 Subject: [PATCH 0480/1075] regmap: irq: Correct documentation of wake_invert flag Per commit 9442490a0286 ("regmap: irq: Support wake IRQ mask inversion") the wake_invert flag is to support enable register, so cleared bits are wake disabled. Fixes: 68622bdfefb9 ("regmap: irq: document mask/wake_invert flags") Cc: stable@vger.kernel.org Signed-off-by: Shawn Guo Link: https://patch.msgid.link/20251024082344.2188895-1-shawnguo2@yeah.net Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4e1ac1fbcec4..55343795644b 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1643,7 +1643,7 @@ struct regmap_irq_chip_data; * @status_invert: Inverted status register: cleared bits are active interrupts. * @status_is_level: Status register is actuall signal level: Xor status * register with previous value to get active interrupts. - * @wake_invert: Inverted wake register: cleared bits are wake enabled. + * @wake_invert: Inverted wake register: cleared bits are wake disabled. * @type_in_mask: Use the mask registers for controlling irq type. Use this if * the hardware provides separate bits for rising/falling edge * or low/high level interrupts and they should be combined into From bd34bf518a5ffeb8eb7c8b9907ba97b606166f7b Mon Sep 17 00:00:00 2001 From: Lazar Aleksic Date: Tue, 28 Oct 2025 19:09:05 +0100 Subject: [PATCH 0481/1075] platform: x86: Kconfig: fix minor typo in help for WIRELESS_HOTKEY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed a misspelling of Xiaomi. Signed-off-by: Lazar Aleksic Link: https://patch.msgid.link/20251028180956.10753-1-kripticni.dev@gmail.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 46e62feeda3c..c122016d82f1 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -432,7 +432,7 @@ config WIRELESS_HOTKEY depends on INPUT help This driver provides supports for the wireless buttons found on some AMD, - HP, & Xioami laptops. + HP, & Xiaomi laptops. On such systems the driver should load automatically (via ACPI alias). To compile this driver as a module, choose M here: the module will From 388eff894d6bc5f921e9bfff0e4b0ab2684a96e9 Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Mon, 9 Jun 2025 17:16:59 -0700 Subject: [PATCH 0482/1075] x86/fpu: Ensure XFD state on signal delivery Sean reported [1] the following splat when running KVM tests: WARNING: CPU: 232 PID: 15391 at xfd_validate_state+0x65/0x70 Call Trace: fpu__clear_user_states+0x9c/0x100 arch_do_signal_or_restart+0x142/0x210 exit_to_user_mode_loop+0x55/0x100 do_syscall_64+0x205/0x2c0 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Chao further identified [2] a reproducible scenario involving signal delivery: a non-AMX task is preempted by an AMX-enabled task which modifies the XFD MSR. When the non-AMX task resumes and reloads XSTATE with init values, a warning is triggered due to a mismatch between fpstate::xfd and the CPU's current XFD state. fpu__clear_user_states() does not currently re-synchronize the XFD state after such preemption. Invoke xfd_update_state() which detects and corrects the mismatch if there is a dynamic feature. This also benefits the sigreturn path, as fpu__restore_sig() may call fpu__clear_user_states() when the sigframe is inaccessible. [ dhansen: minor changelog munging ] Closes: https://lore.kernel.org/lkml/aDCo_SczQOUaB2rS@google.com [1] Fixes: 672365477ae8a ("x86/fpu: Update XFD state where required") Reported-by: Sean Christopherson Signed-off-by: Chang S. Bae Signed-off-by: Dave Hansen Reviewed-by: Chao Gao Tested-by: Chao Gao Link: https://lore.kernel.org/all/aDWbctO%2FRfTGiCg3@intel.com [2] Cc:stable@vger.kernel.org Link: https://patch.msgid.link/20250610001700.4097-1-chang.seok.bae%40intel.com --- arch/x86/kernel/fpu/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1f71cc135e9a..e88eacb1b5bb 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -825,6 +825,9 @@ void fpu__clear_user_states(struct fpu *fpu) !fpregs_state_valid(fpu, smp_processor_id())) os_xrstor_supervisor(fpu->fpstate); + /* Ensure XFD state is in sync before reloading XSTATE */ + xfd_update_state(fpu->fpstate); + /* Reset user states in registers. */ restore_fpregs_from_init_fpstate(XFEATURE_MASK_USER_RESTORE); From 0d6e9ec80cebf9b378a1d3a01144e576d731c397 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Oct 2025 12:40:59 +0100 Subject: [PATCH 0483/1075] x86/build: Disable SSE4a MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leyvi Rose reported that his X86_NATIVE_CPU=y build is failing because our instruction decoder doesn't support SSE4a and the AMDGPU code seems to be generating those with his compiler of choice (CLANG+LTO). Now, our normal build flags disable SSE MMX SSE2 3DNOW AVX, but then CC_FLAGS_FPU re-enable SSE SSE2. Since nothing mentions SSE3 or SSE4, I'm assuming that -msse (or its negative) control all SSE variants -- but why then explicitly enumerate SSE2 ? Anyway, until the instruction decoder gets fixed, explicitly disallow SSE4a (an AMD specific SSE4 extension). Fixes: ea1dcca1de12 ("x86/kbuild/64: Add the CONFIG_X86_NATIVE_CPU option to locally optimize the kernel with '-march=native'") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Acked-by: Borislav Petkov (AMD) Acked-by: Arisu Tachibana Acked-by: Christian König Acked-by: Harry Wentland Cc: --- arch/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 4db7e4bf69f5..8fbff3106c56 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -75,7 +75,7 @@ export BITS # # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 # -KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx +KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -mno-sse4a KBUILD_RUSTFLAGS += --target=$(objtree)/scripts/target.json KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 From e9840461317e1bf0628b164de54632754d5f6a44 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Fri, 21 Feb 2025 10:39:49 +0100 Subject: [PATCH 0484/1075] ice: fix lane number calculation E82X adapters do not have sequential IDs, lane number is PF ID. Add check for ICE_MAC_GENERIC and skip checking port options. Also, adjust logical port number for specific E825 device with external PHY support (PCI device id 0x579F). For this particular device, with 2x25G (PHY0) and 2x10G (PHY1) port configuration, modification of pf_id -> lane_number mapping is required. PF IDs on the 2nd PHY start from 4 in such scenario. Otherwise, the lane number cannot be determined correctly, leading to PTP init errors during PF initialization. Fixes: 258f5f9058159 ("ice: Add correct PHY lane assignment") Co-developed-by: Karol Kolacinski Signed-off-by: Karol Kolacinski Signed-off-by: Grzegorz Nitka Reviewed-by: Przemek Kitszel Reviewed-by: Milena Olech Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2250426ec91b..28d74bf56ffc 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -4382,6 +4382,15 @@ int ice_get_phy_lane_number(struct ice_hw *hw) unsigned int lane; int err; + /* E82X does not have sequential IDs, lane number is PF ID. + * For E825 device, the exception is the variant with external + * PHY (0x579F), in which there is also 1:1 pf_id -> lane_number + * mapping. + */ + if (hw->mac_type == ICE_MAC_GENERIC || + hw->device_id == ICE_DEV_ID_E825C_SGMII) + return hw->pf_id; + options = kcalloc(ICE_AQC_PORT_OPT_MAX, sizeof(*options), GFP_KERNEL); if (!options) return -ENOMEM; From 45076413063cf5e0e25fd3f7f89fc90338b161c8 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Mon, 29 Sep 2025 17:29:05 +0200 Subject: [PATCH 0485/1075] ice: fix destination CGU for dual complex E825 On dual complex E825, only complex 0 has functional CGU (Clock Generation Unit), powering all the PHYs. SBQ (Side Band Queue) destination device 'cgu' in current implementation points to CGU on current complex and, in order to access primary CGU from the secondary complex, the driver should use 'cgu_peer' as a destination device in read/write CGU registers operations. Define new 'cgu_peer' (15) as RDA (Remote Device Access) client over SB-IOSF interface and use it as device target when accessing CGU from secondary complex. This problem has been identified when working on recovery clock enablement [1]. In existing implementation for E825 devices, only PF0, which is clock owner, is involved in CGU configuration, thus the problem was not exposed to the user. [1] https://lore.kernel.org/intel-wired-lan/20250905150947.871566-1-grzegorz.nitka@intel.com/ Fixes: e2193f9f9ec9 ("ice: enable timesync operation on 2xNAC E825 devices") Signed-off-by: Grzegorz Nitka Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 26 ++++++++++++++++++-- drivers/net/ethernet/intel/ice/ice_sbq_cmd.h | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 28d74bf56ffc..2532b6f82e97 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -6505,6 +6505,28 @@ u32 ice_get_link_speed(u16 index) return ice_aq_to_link_speed[index]; } +/** + * ice_get_dest_cgu - get destination CGU dev for given HW + * @hw: pointer to the HW struct + * + * Get CGU client id for CGU register read/write operations. + * + * Return: CGU device id to use in SBQ transactions. + */ +static enum ice_sbq_dev_id ice_get_dest_cgu(struct ice_hw *hw) +{ + /* On dual complex E825 only complex 0 has functional CGU powering all + * the PHYs. + * SBQ destination device cgu points to CGU on a current complex and to + * access primary CGU from the secondary complex, the driver should use + * cgu_peer as a destination device. + */ + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) && + !ice_is_primary(hw)) + return ice_sbq_dev_cgu_peer; + return ice_sbq_dev_cgu; +} + /** * ice_read_cgu_reg - Read a CGU register * @hw: Pointer to the HW struct @@ -6519,8 +6541,8 @@ u32 ice_get_link_speed(u16 index) int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val) { struct ice_sbq_msg_input cgu_msg = { + .dest_dev = ice_get_dest_cgu(hw), .opcode = ice_sbq_msg_rd, - .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr }; int err; @@ -6551,8 +6573,8 @@ int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val) int ice_write_cgu_reg(struct ice_hw *hw, u32 addr, u32 val) { struct ice_sbq_msg_input cgu_msg = { + .dest_dev = ice_get_dest_cgu(hw), .opcode = ice_sbq_msg_wr, - .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr, .data = val }; diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h index 183dd5457d6a..21bb861febbf 100644 --- a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h @@ -50,6 +50,7 @@ enum ice_sbq_dev_id { ice_sbq_dev_phy_0 = 0x02, ice_sbq_dev_cgu = 0x06, ice_sbq_dev_phy_0_peer = 0x0D, + ice_sbq_dev_cgu_peer = 0x0F, }; enum ice_sbq_msg_opcode { From 9a0f81fc64b2ba80ce768cd6e680c0f440723464 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Wed, 8 Oct 2025 12:28:53 +0200 Subject: [PATCH 0486/1075] ice: fix usage of logical PF id In some devices, the function numbers used are non-contiguous. For example, here is such configuration for E825 device: root@/home/root# lspci -v | grep Eth 0a:00.0 Ethernet controller: Intel Corporation Ethernet Connection E825-C for backplane (rev 04) 0a:00.1 Ethernet controller: Intel Corporation Ethernet Connection E825-C for backplane (rev 04) 0a:00.4 Ethernet controller: Intel Corporation Ethernet Connection E825-C 10GbE (rev 04) 0a:00.5 Ethernet controller: Intel Corporation Ethernet Connection E825-C 10GbE (rev 04) When distributing RSS and FDIR masks, which are global resources across the active devices, it is required to have a contiguous PF id, which can be described as a logical PF id. In the case above, function 0 would have a logical PF id of 0, function 1 would have a logical PF id of 1, and functions 4 and 5 would have a logical PF ids 2 and 3 respectively. Using logical PF id can properly describe which slice of resources can be used by a particular PF. The 'function id' to 'logical id' mapping has been introduced with the commit 015307754a19 ("ice: Support VF queue rate limit and quanta size configuration"). However, the usage of 'logical_pf_id' field was unintentionally skipped for profile mask configuration. Fix it by using 'logical_pf_id' instead of 'pf_id' value when configuring masks. Without that patch, wrong indexes, i.e. out of range for given PF, can be used while configuring resources masks, which might lead to memory corruption and undefined driver behavior. The call trace below is one of the examples of such error: [ +0.000008] WARNING: CPU: 39 PID: 3830 at drivers/base/devres.c:1095 devm_kfree+0x70/0xa0 [ +0.000002] RIP: 0010:devm_kfree+0x70/0xa0 [ +0.000001] Call Trace: [ +0.000002] [ +0.000002] ice_free_hw_tbls+0x183/0x710 [ice] [ +0.000106] ice_deinit_hw+0x67/0x90 [ice] [ +0.000091] ice_deinit+0x20d/0x2f0 [ice] [ +0.000076] ice_remove+0x1fa/0x6a0 [ice] [ +0.000075] pci_device_remove+0xa7/0x1d0 [ +0.000010] device_release_driver_internal+0x365/0x530 [ +0.000006] driver_detach+0xbb/0x170 [ +0.000003] bus_remove_driver+0x117/0x290 [ +0.000007] pci_unregister_driver+0x26/0x250 Fixes: 015307754a19 ("ice: Support VF queue rate limit and quanta size configuration") Suggested-by: Dan Nowlin Signed-off-by: Grzegorz Nitka Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 363ae79a3620..013c93b6605e 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1479,7 +1479,7 @@ static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk) per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs; hw->blk[blk].masks.count = per_pf; - hw->blk[blk].masks.first = hw->pf_id * per_pf; + hw->blk[blk].masks.first = hw->logical_pf_id * per_pf; memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks)); From 85308d999c4b4162a742c9ec5ef954226c3b48d9 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 1 Sep 2025 05:33:11 +0900 Subject: [PATCH 0487/1075] ixgbe: fix memory leak and use-after-free in ixgbe_recovery_probe() The error path of ixgbe_recovery_probe() has two memory bugs. For non-E610 adapters, the function jumps to clean_up_probe without calling devlink_free(), leaking the devlink instance and its embedded adapter structure. For E610 adapters, devlink_free() is called at shutdown_aci, but clean_up_probe then accesses adapter->state, sometimes triggering use-after-free because adapter is embedded in devlink. This UAF is similar to the one recently reported in ixgbe_remove(). (Link) Fix both issues by moving devlink_free() after adapter->state access, aligning with the cleanup order in ixgbe_probe(). Link: https://lore.kernel.org/intel-wired-lan/20250828020558.1450422-1-den@valinux.co.jp/ Fixes: 29cb3b8d95c7 ("ixgbe: add E610 implementation of FW recovery mode") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Reviewed-by: Jedrzej Jagielski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ca1ccc630001..3190ce7e44c7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -11507,10 +11507,10 @@ static int ixgbe_recovery_probe(struct ixgbe_adapter *adapter) shutdown_aci: mutex_destroy(&adapter->hw.aci.lock); ixgbe_release_hw_control(adapter); - devlink_free(adapter->devlink); clean_up_probe: disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); + devlink_free(adapter->devlink); pci_release_mem_regions(pdev); if (disable_dev) pci_disable_device(pdev); From 81fb1fe75c672db905b54f4ab744552121099a24 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Sat, 20 Sep 2025 15:39:18 +0900 Subject: [PATCH 0488/1075] igc: power up the PHY before the link test The current implementation of the igc driver doesn't power up the PHY before the link test in igc_ethtool_diag_test(), causing the link test to always report FAIL when admin state is down and the PHY is consequently powered down. To test the link state regardless of admin state, power up the PHY before the link test in the offline test path. After the link test, the original PHY state is restored by igc_reset(), so additional code which explicitly restores the original state is not necessary. Note that this change is applied only for the offline test path. This is because in the online path we shouldn't interrupt normal networking operation and powering up the PHY and restoring the original state would interrupt that. This implementation also uses igc_power_up_phy_copper() without checking the media type, since igc devices are currently only copper devices and the function is called in other places without checking the media type. Furthermore, the powering up is on a best-effort basis, that is, we don't handle failures of powering up (e.g. bus error) and just let the test report FAIL. Tested on Intel Corporation Ethernet Controller I226-V (rev 04) with cable connected and link available. Set device down and do ethtool test. # ip link set dev enp0s5 down Without patch: # ethtool --test enp0s5 The test result is FAIL The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 0 Link test (on/offline) 1 With patch: # ethtool --test enp0s5 The test result is PASS The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 0 Link test (on/offline) 0 Fixes: f026d8ca2904 ("igc: add support to eeprom, registers and link self-tests") Signed-off-by: Kohei Enju Reviewed-by: Vitaly Lifshits Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index f3e7218ba6f3..ca93629b1d3a 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -2094,6 +2094,9 @@ static void igc_ethtool_diag_test(struct net_device *netdev, netdev_info(adapter->netdev, "Offline testing starting"); set_bit(__IGC_TESTING, &adapter->state); + /* power up PHY for link test */ + igc_power_up_phy_copper(&adapter->hw); + /* Link test performed before hardware reset so autoneg doesn't * interfere with test result */ From bc73c5885c606f5e48dd4222eba0361fa0f146ca Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 6 Oct 2025 21:35:21 +0900 Subject: [PATCH 0489/1075] igb: use EOPNOTSUPP instead of ENOTSUPP in igb_get_sset_count() igb_get_sset_count() returns -ENOTSUPP when a given stringset is not supported, causing userland programs to get "Unknown error 524". Since EOPNOTSUPP should be used when error is propagated to userland, return -EOPNOTSUPP instead of -ENOTSUPP. Fixes: 9d5c824399de ("igb: PCI-Express 82575 Gigabit Ethernet driver") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f8a208c84f15..10e2445e0ded 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2281,7 +2281,7 @@ static int igb_get_sset_count(struct net_device *netdev, int sset) case ETH_SS_PRIV_FLAGS: return IGB_PRIV_FLAGS_STR_LEN; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } From 21d08d1c4c29f9795fbc678011c85f72931e22c1 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 6 Oct 2025 21:35:22 +0900 Subject: [PATCH 0490/1075] igc: use EOPNOTSUPP instead of ENOTSUPP in igc_ethtool_get_sset_count() igc_ethtool_get_sset_count() returns -ENOTSUPP when a given stringset is not supported, causing userland programs to get "Unknown error 524". Since EOPNOTSUPP should be used when error is propagated to userland, return -EOPNOTSUPP instead of -ENOTSUPP. Fixes: 36b9fea60961 ("igc: Add support for statistics") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index ca93629b1d3a..bb783042d1af 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -810,7 +810,7 @@ static int igc_ethtool_get_sset_count(struct net_device *netdev, int sset) case ETH_SS_PRIV_FLAGS: return IGC_PRIV_FLAGS_STR_LEN; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } From f82acf6fb42115c87d3809968a2e0ab2fedba15b Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 6 Oct 2025 21:35:23 +0900 Subject: [PATCH 0491/1075] ixgbe: use EOPNOTSUPP instead of ENOTSUPP in ixgbe_ptp_feature_enable() When the requested PTP feature is not supported, ixgbe_ptp_feature_enable() returns -ENOTSUPP, causing userland programs to get "Unknown error 524". Since EOPNOTSUPP should be used when error is propagated to userland, return -EOPNOTSUPP instead of -ENOTSUPP. Fixes: 3a6a4edaa592 ("ixgbe: Hardware Timestamping + PTP Hardware Clock (PHC)") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 114dd88fc71c..6885d2343c48 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -641,7 +641,7 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp, * disabled */ if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp) - return -ENOTSUPP; + return -EOPNOTSUPP; if (on) adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; From dc8aa0cb87a7836b59422cc02d969c8df849ee39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 22 Oct 2025 13:07:16 +0300 Subject: [PATCH 0492/1075] drm/i915/dmc: Clear HRR EVT_CTL/HTP to zero on ADL-S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ADL-S the main DMC HRR event DMC_EVT_CTL/HTP are never restored to their previous values during DC6 exit. This angers assert_dmc_loaded(), and basically makes the HRR handler unusable because we don't rewrite EVT_HTP when enabling DMC events. Let's just clear the HRR EVT_CTL/HTP to zero from the beginnning so that the expected value matches the post-DC6 reality. I suppose if we ever had actual use for HRR we'd have to both, reject HRR+PSR, and reprogram EVT_HTP when enabling the event. But for now we don't care about HRR so keeping both registers zeroed is fine. Cc: stable@vger.kernel.org Tested-by: Petr Vorel Fixes: 43175c92d403 ("drm/i915/dmc: Assert DMC is loaded harder") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15153 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20251022100718.24803-2-ville.syrjala@linux.intel.com Reviewed-by: Petr Vorel Reviewed-by: Imre Deak Tested-by: Imre Deak (cherry picked from commit 4df3b340ff6e9f499735d8b52b96a9257fde3918) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dmc.c | 55 +++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c index 77a0199f9ea5..4a4cace1f879 100644 --- a/drivers/gpu/drm/i915/display/intel_dmc.c +++ b/drivers/gpu/drm/i915/display/intel_dmc.c @@ -546,6 +546,36 @@ static bool is_event_handler(struct intel_display *display, REG_FIELD_GET(DMC_EVT_CTL_EVENT_ID_MASK, data) == event_id; } +static bool fixup_dmc_evt(struct intel_display *display, + enum intel_dmc_id dmc_id, + i915_reg_t reg_ctl, u32 *data_ctl, + i915_reg_t reg_htp, u32 *data_htp) +{ + if (!is_dmc_evt_ctl_reg(display, dmc_id, reg_ctl)) + return false; + + if (!is_dmc_evt_htp_reg(display, dmc_id, reg_htp)) + return false; + + /* make sure reg_ctl and reg_htp are for the same event */ + if (i915_mmio_reg_offset(reg_ctl) - i915_mmio_reg_offset(DMC_EVT_CTL(display, dmc_id, 0)) != + i915_mmio_reg_offset(reg_htp) - i915_mmio_reg_offset(DMC_EVT_HTP(display, dmc_id, 0))) + return false; + + /* + * On ADL-S the HRR event handler is not restored after DC6. + * Clear it to zero from the beginning to avoid mismatches later. + */ + if (display->platform.alderlake_s && dmc_id == DMC_FW_MAIN && + is_event_handler(display, dmc_id, MAINDMC_EVENT_VBLANK_A, reg_ctl, *data_ctl)) { + *data_ctl = 0; + *data_htp = 0; + return true; + } + + return false; +} + static bool disable_dmc_evt(struct intel_display *display, enum intel_dmc_id dmc_id, i915_reg_t reg, u32 data) @@ -1064,9 +1094,32 @@ static u32 parse_dmc_fw_header(struct intel_dmc *dmc, for (i = 0; i < mmio_count; i++) { dmc_info->mmioaddr[i] = _MMIO(mmioaddr[i]); dmc_info->mmiodata[i] = mmiodata[i]; + } + for (i = 0; i < mmio_count - 1; i++) { + u32 orig_mmiodata[2] = { + dmc_info->mmiodata[i], + dmc_info->mmiodata[i+1], + }; + + if (!fixup_dmc_evt(display, dmc_id, + dmc_info->mmioaddr[i], &dmc_info->mmiodata[i], + dmc_info->mmioaddr[i+1], &dmc_info->mmiodata[i+1])) + continue; + + drm_dbg_kms(display->drm, + " mmio[%d]: 0x%x = 0x%x->0x%x (EVT_CTL)\n", + i, i915_mmio_reg_offset(dmc_info->mmioaddr[i]), + orig_mmiodata[0], dmc_info->mmiodata[i]); + drm_dbg_kms(display->drm, + " mmio[%d]: 0x%x = 0x%x->0x%x (EVT_HTP)\n", + i+1, i915_mmio_reg_offset(dmc_info->mmioaddr[i+1]), + orig_mmiodata[1], dmc_info->mmiodata[i+1]); + } + + for (i = 0; i < mmio_count; i++) { drm_dbg_kms(display->drm, " mmio[%d]: 0x%x = 0x%x%s%s\n", - i, mmioaddr[i], mmiodata[i], + i, i915_mmio_reg_offset(dmc_info->mmioaddr[i]), dmc_info->mmiodata[i], is_dmc_evt_ctl_reg(display, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_CTL)" : is_dmc_evt_htp_reg(display, dmc_id, dmc_info->mmioaddr[i]) ? " (EVT_HTP)" : "", disable_dmc_evt(display, dmc_id, dmc_info->mmioaddr[i], From 6012379ede6aa7477db6276bb9876fe7d67c4312 Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 28 Oct 2025 09:15:00 -0700 Subject: [PATCH 0493/1075] vfio/type1: sanitize for overflow using check_*_overflow() Adopt check_*_overflow() functions to clearly express overflow check intent. Tested-by: Alejandro Jimenez Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation") Reviewed-by: Jason Gunthorpe Reviewed-by: Alejandro Jimenez Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251028-fix-unmap-v6-1-2542b96bcc8e@fb.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 86 ++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 23 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 916cad80941c..91b1480b7a37 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "vfio.h" #define DRIVER_VERSION "0.2" @@ -182,7 +183,7 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, } static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu, - dma_addr_t start, u64 size) + dma_addr_t start, size_t size) { struct rb_node *res = NULL; struct rb_node *node = iommu->dma_list.rb_node; @@ -895,14 +896,20 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, unsigned long remote_vaddr; struct vfio_dma *dma; bool do_accounting; + dma_addr_t iova_end; + size_t iova_size; - if (!iommu || !pages) + if (!iommu || !pages || npage <= 0) return -EINVAL; /* Supported for v2 version only */ if (!iommu->v2) return -EACCES; + if (check_mul_overflow(npage, PAGE_SIZE, &iova_size) || + check_add_overflow(user_iova, iova_size - 1, &iova_end)) + return -EOVERFLOW; + mutex_lock(&iommu->lock); if (WARN_ONCE(iommu->vaddr_invalid_count, @@ -1008,12 +1015,21 @@ static void vfio_iommu_type1_unpin_pages(void *iommu_data, { struct vfio_iommu *iommu = iommu_data; bool do_accounting; + dma_addr_t iova_end; + size_t iova_size; int i; /* Supported for v2 version only */ if (WARN_ON(!iommu->v2)) return; + if (WARN_ON(npage <= 0)) + return; + + if (WARN_ON(check_mul_overflow(npage, PAGE_SIZE, &iova_size) || + check_add_overflow(user_iova, iova_size - 1, &iova_end))) + return; + mutex_lock(&iommu->lock); do_accounting = list_empty(&iommu->domain_list); @@ -1374,7 +1390,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, int ret = -EINVAL, retries = 0; unsigned long pgshift; dma_addr_t iova = unmap->iova; - u64 size = unmap->size; + dma_addr_t iova_end; + size_t size = unmap->size; bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL; bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR; struct rb_node *n, *first_n; @@ -1387,6 +1404,11 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, goto unlock; } + if (iova != unmap->iova || size != unmap->size) { + ret = -EOVERFLOW; + goto unlock; + } + pgshift = __ffs(iommu->pgsize_bitmap); pgsize = (size_t)1 << pgshift; @@ -1396,10 +1418,15 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (unmap_all) { if (iova || size) goto unlock; - size = U64_MAX; - } else if (!size || size & (pgsize - 1) || - iova + size - 1 < iova || size > SIZE_MAX) { - goto unlock; + size = SIZE_MAX; + } else { + if (!size || size & (pgsize - 1)) + goto unlock; + + if (check_add_overflow(iova, size - 1, &iova_end)) { + ret = -EOVERFLOW; + goto unlock; + } } /* When dirty tracking is enabled, allow only min supported pgsize */ @@ -1446,7 +1473,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (dma && dma->iova != iova) goto unlock; - dma = vfio_find_dma(iommu, iova + size - 1, 0); + dma = vfio_find_dma(iommu, iova_end, 0); if (dma && dma->iova + dma->size != iova + size) goto unlock; } @@ -1648,7 +1675,9 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, { bool set_vaddr = map->flags & VFIO_DMA_MAP_FLAG_VADDR; dma_addr_t iova = map->iova; + dma_addr_t iova_end; unsigned long vaddr = map->vaddr; + unsigned long vaddr_end; size_t size = map->size; int ret = 0, prot = 0; size_t pgsize; @@ -1656,8 +1685,15 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, /* Verify that none of our __u64 fields overflow */ if (map->size != size || map->vaddr != vaddr || map->iova != iova) + return -EOVERFLOW; + + if (!size) return -EINVAL; + if (check_add_overflow(iova, size - 1, &iova_end) || + check_add_overflow(vaddr, size - 1, &vaddr_end)) + return -EOVERFLOW; + /* READ/WRITE from device perspective */ if (map->flags & VFIO_DMA_MAP_FLAG_WRITE) prot |= IOMMU_WRITE; @@ -1673,13 +1709,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, WARN_ON((pgsize - 1) & PAGE_MASK); - if (!size || (size | iova | vaddr) & (pgsize - 1)) { - ret = -EINVAL; - goto out_unlock; - } - - /* Don't allow IOVA or virtual address wrap */ - if (iova + size - 1 < iova || vaddr + size - 1 < vaddr) { + if ((size | iova | vaddr) & (pgsize - 1)) { ret = -EINVAL; goto out_unlock; } @@ -1710,7 +1740,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } - if (!vfio_iommu_iova_dma_valid(iommu, iova, iova + size - 1)) { + if (!vfio_iommu_iova_dma_valid(iommu, iova, iova_end)) { ret = -EINVAL; goto out_unlock; } @@ -2977,7 +3007,8 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, struct vfio_iommu_type1_dirty_bitmap_get range; unsigned long pgshift; size_t data_size = dirty.argsz - minsz; - size_t iommu_pgsize; + size_t size, iommu_pgsize; + dma_addr_t iova, iova_end; if (!data_size || data_size < sizeof(range)) return -EINVAL; @@ -2986,14 +3017,24 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, sizeof(range))) return -EFAULT; - if (range.iova + range.size < range.iova) + iova = range.iova; + size = range.size; + + if (iova != range.iova || size != range.size) + return -EOVERFLOW; + + if (!size) return -EINVAL; + + if (check_add_overflow(iova, size - 1, &iova_end)) + return -EOVERFLOW; + if (!access_ok((void __user *)range.bitmap.data, range.bitmap.size)) return -EINVAL; pgshift = __ffs(range.bitmap.pgsize); - ret = verify_bitmap_size(range.size >> pgshift, + ret = verify_bitmap_size(size >> pgshift, range.bitmap.size); if (ret) return ret; @@ -3007,19 +3048,18 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, ret = -EINVAL; goto out_unlock; } - if (range.iova & (iommu_pgsize - 1)) { + if (iova & (iommu_pgsize - 1)) { ret = -EINVAL; goto out_unlock; } - if (!range.size || range.size & (iommu_pgsize - 1)) { + if (size & (iommu_pgsize - 1)) { ret = -EINVAL; goto out_unlock; } if (iommu->dirty_page_tracking) ret = vfio_iova_dirty_bitmap(range.bitmap.data, - iommu, range.iova, - range.size, + iommu, iova, size, range.bitmap.pgsize); else ret = -EINVAL; From 1196f1f897d4ee64d8844e8cfa97c8f93e4d158c Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 28 Oct 2025 09:15:01 -0700 Subject: [PATCH 0494/1075] vfio/type1: move iova increment to unmap_unpin_*() caller Move incrementing iova to the caller of these functions as part of preparing to handle end of address space map/unmap. Tested-by: Alejandro Jimenez Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation") Reviewed-by: Jason Gunthorpe Reviewed-by: Alejandro Jimenez Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251028-fix-unmap-v6-2-2542b96bcc8e@fb.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 91b1480b7a37..48bcc0633d44 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1083,7 +1083,7 @@ static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain, #define VFIO_IOMMU_TLB_SYNC_MAX 512 static size_t unmap_unpin_fast(struct vfio_domain *domain, - struct vfio_dma *dma, dma_addr_t *iova, + struct vfio_dma *dma, dma_addr_t iova, size_t len, phys_addr_t phys, long *unlocked, struct list_head *unmapped_list, int *unmapped_cnt, @@ -1093,18 +1093,17 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry) { - unmapped = iommu_unmap_fast(domain->domain, *iova, len, + unmapped = iommu_unmap_fast(domain->domain, iova, len, iotlb_gather); if (!unmapped) { kfree(entry); } else { - entry->iova = *iova; + entry->iova = iova; entry->phys = phys; entry->len = unmapped; list_add_tail(&entry->list, unmapped_list); - *iova += unmapped; (*unmapped_cnt)++; } } @@ -1123,18 +1122,17 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, } static size_t unmap_unpin_slow(struct vfio_domain *domain, - struct vfio_dma *dma, dma_addr_t *iova, + struct vfio_dma *dma, dma_addr_t iova, size_t len, phys_addr_t phys, long *unlocked) { - size_t unmapped = iommu_unmap(domain->domain, *iova, len); + size_t unmapped = iommu_unmap(domain->domain, iova, len); if (unmapped) { - *unlocked += vfio_unpin_pages_remote(dma, *iova, + *unlocked += vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT, unmapped >> PAGE_SHIFT, false); - *iova += unmapped; cond_resched(); } return unmapped; @@ -1197,16 +1195,18 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, * First, try to use fast unmap/unpin. In case of failure, * switch to slow unmap/unpin path. */ - unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys, + unmapped = unmap_unpin_fast(domain, dma, iova, len, phys, &unlocked, &unmapped_region_list, &unmapped_region_cnt, &iotlb_gather); if (!unmapped) { - unmapped = unmap_unpin_slow(domain, dma, &iova, len, + unmapped = unmap_unpin_slow(domain, dma, iova, len, phys, &unlocked); if (WARN_ON(!unmapped)) break; } + + iova += unmapped; } dma->iommu_mapped = false; From ef270ec44637d464126bd4ade483c4a1887e06bc Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 28 Oct 2025 09:15:02 -0700 Subject: [PATCH 0495/1075] vfio/type1: handle DMA map/unmap up to the addressable limit Before this commit, it was possible to create end of address space mappings, but unmapping them via VFIO_IOMMU_UNMAP_DMA, replaying them for newly added iommu domains, and querying their dirty pages via VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP was broken due to bugs caused by comparisons against (iova + size) expressions, which overflow to zero. Additionally, there appears to be a page pinning leak in the vfio_iommu_type1_release() path, since vfio_unmap_unpin()'s loop body where unmap_unpin_*() are called will never be entered due to overflow of (iova + size) to zero. This commit handles DMA map/unmap operations up to the addressable limit by comparing against inclusive end-of-range limits, and changing iteration to perform relative traversals across range sizes, rather than absolute traversals across addresses. vfio_link_dma() inserts a zero-sized vfio_dma into the rb-tree, and is only used for that purpose, so discard the size from consideration for the insertion point. Tested-by: Alejandro Jimenez Fixes: 73fa0d10d077 ("vfio: Type1 IOMMU implementation") Reviewed-by: Jason Gunthorpe Reviewed-by: Alejandro Jimenez Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251028-fix-unmap-v6-3-2542b96bcc8e@fb.com Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 77 ++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 35 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 48bcc0633d44..5167bec14e36 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -168,12 +168,14 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, { struct rb_node *node = iommu->dma_list.rb_node; + WARN_ON(!size); + while (node) { struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); - if (start + size <= dma->iova) + if (start + size - 1 < dma->iova) node = node->rb_left; - else if (start >= dma->iova + dma->size) + else if (start > dma->iova + dma->size - 1) node = node->rb_right; else return dma; @@ -183,16 +185,19 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, } static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu, - dma_addr_t start, size_t size) + dma_addr_t start, + dma_addr_t end) { struct rb_node *res = NULL; struct rb_node *node = iommu->dma_list.rb_node; struct vfio_dma *dma_res = NULL; + WARN_ON(end < start); + while (node) { struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); - if (start < dma->iova + dma->size) { + if (start <= dma->iova + dma->size - 1) { res = node; dma_res = dma; if (start >= dma->iova) @@ -202,7 +207,7 @@ static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu, node = node->rb_right; } } - if (res && size && dma_res->iova >= start + size) + if (res && dma_res->iova > end) res = NULL; return res; } @@ -212,11 +217,13 @@ static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new) struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL; struct vfio_dma *dma; + WARN_ON(new->size != 0); + while (*link) { parent = *link; dma = rb_entry(parent, struct vfio_dma, node); - if (new->iova + new->size <= dma->iova) + if (new->iova <= dma->iova) link = &(*link)->rb_left; else link = &(*link)->rb_right; @@ -1141,12 +1148,12 @@ static size_t unmap_unpin_slow(struct vfio_domain *domain, static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, bool do_accounting) { - dma_addr_t iova = dma->iova, end = dma->iova + dma->size; struct vfio_domain *domain, *d; LIST_HEAD(unmapped_region_list); struct iommu_iotlb_gather iotlb_gather; int unmapped_region_cnt = 0; long unlocked = 0; + size_t pos = 0; if (!dma->size) return 0; @@ -1170,13 +1177,14 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, } iommu_iotlb_gather_init(&iotlb_gather); - while (iova < end) { + while (pos < dma->size) { size_t unmapped, len; phys_addr_t phys, next; + dma_addr_t iova = dma->iova + pos; phys = iommu_iova_to_phys(domain->domain, iova); if (WARN_ON(!phys)) { - iova += PAGE_SIZE; + pos += PAGE_SIZE; continue; } @@ -1185,7 +1193,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, * may require hardware cache flushing, try to find the * largest contiguous physical memory chunk to unmap. */ - for (len = PAGE_SIZE; iova + len < end; len += PAGE_SIZE) { + for (len = PAGE_SIZE; pos + len < dma->size; len += PAGE_SIZE) { next = iommu_iova_to_phys(domain->domain, iova + len); if (next != phys + len) break; @@ -1206,7 +1214,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, break; } - iova += unmapped; + pos += unmapped; } dma->iommu_mapped = false; @@ -1298,7 +1306,7 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, } static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, - dma_addr_t iova, size_t size, size_t pgsize) + dma_addr_t iova, dma_addr_t iova_end, size_t pgsize) { struct vfio_dma *dma; struct rb_node *n; @@ -1315,8 +1323,8 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, if (dma && dma->iova != iova) return -EINVAL; - dma = vfio_find_dma(iommu, iova + size - 1, 0); - if (dma && dma->iova + dma->size != iova + size) + dma = vfio_find_dma(iommu, iova_end, 1); + if (dma && dma->iova + dma->size - 1 != iova_end) return -EINVAL; for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { @@ -1325,7 +1333,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, if (dma->iova < iova) continue; - if (dma->iova > iova + size - 1) + if (dma->iova > iova_end) break; ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize); @@ -1418,7 +1426,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (unmap_all) { if (iova || size) goto unlock; - size = SIZE_MAX; + iova_end = ~(dma_addr_t)0; } else { if (!size || size & (pgsize - 1)) goto unlock; @@ -1473,17 +1481,17 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (dma && dma->iova != iova) goto unlock; - dma = vfio_find_dma(iommu, iova_end, 0); - if (dma && dma->iova + dma->size != iova + size) + dma = vfio_find_dma(iommu, iova_end, 1); + if (dma && dma->iova + dma->size - 1 != iova_end) goto unlock; } ret = 0; - n = first_n = vfio_find_dma_first_node(iommu, iova, size); + n = first_n = vfio_find_dma_first_node(iommu, iova, iova_end); while (n) { dma = rb_entry(n, struct vfio_dma, node); - if (dma->iova >= iova + size) + if (dma->iova > iova_end) break; if (!iommu->v2 && iova > dma->iova) @@ -1813,12 +1821,12 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, for (; n; n = rb_next(n)) { struct vfio_dma *dma; - dma_addr_t iova; + size_t pos = 0; dma = rb_entry(n, struct vfio_dma, node); - iova = dma->iova; - while (iova < dma->iova + dma->size) { + while (pos < dma->size) { + dma_addr_t iova = dma->iova + pos; phys_addr_t phys; size_t size; @@ -1834,14 +1842,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, phys = iommu_iova_to_phys(d->domain, iova); if (WARN_ON(!phys)) { - iova += PAGE_SIZE; + pos += PAGE_SIZE; continue; } size = PAGE_SIZE; p = phys + size; i = iova + size; - while (i < dma->iova + dma->size && + while (pos + size < dma->size && p == iommu_iova_to_phys(d->domain, i)) { size += PAGE_SIZE; p += PAGE_SIZE; @@ -1849,9 +1857,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } } else { unsigned long pfn; - unsigned long vaddr = dma->vaddr + - (iova - dma->iova); - size_t n = dma->iova + dma->size - iova; + unsigned long vaddr = dma->vaddr + pos; + size_t n = dma->size - pos; long npage; npage = vfio_pin_pages_remote(dma, vaddr, @@ -1882,7 +1889,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, goto unwind; } - iova += size; + pos += size; } } @@ -1899,29 +1906,29 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, unwind: for (; n; n = rb_prev(n)) { struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); - dma_addr_t iova; + size_t pos = 0; if (dma->iommu_mapped) { iommu_unmap(domain->domain, dma->iova, dma->size); continue; } - iova = dma->iova; - while (iova < dma->iova + dma->size) { + while (pos < dma->size) { + dma_addr_t iova = dma->iova + pos; phys_addr_t phys, p; size_t size; dma_addr_t i; phys = iommu_iova_to_phys(domain->domain, iova); if (!phys) { - iova += PAGE_SIZE; + pos += PAGE_SIZE; continue; } size = PAGE_SIZE; p = phys + size; i = iova + size; - while (i < dma->iova + dma->size && + while (pos + size < dma->size && p == iommu_iova_to_phys(domain->domain, i)) { size += PAGE_SIZE; p += PAGE_SIZE; @@ -3059,7 +3066,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu, if (iommu->dirty_page_tracking) ret = vfio_iova_dirty_bitmap(range.bitmap.data, - iommu, iova, size, + iommu, iova, iova_end, range.bitmap.pgsize); else ret = -EINVAL; From 16950b60c19b9137eb8bfeb298621e803e98dcc7 Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 28 Oct 2025 09:15:03 -0700 Subject: [PATCH 0496/1075] vfio: selftests: update DMA map/unmap helpers to support more test kinds Add __vfio_pci_dma_*() helpers which return -errno from the underlying ioctls. Add __vfio_pci_dma_unmap_all() to test more unmapping code paths. Add an out unmapped arg to report the unmapped byte size. The existing vfio_pci_dma_*() functions, which are intended for happy-path usage (assert on failure) are now thin wrappers on top of the double-underscore helpers. Reviewed-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251028-fix-unmap-v6-4-2542b96bcc8e@fb.com Signed-off-by: Alex Williamson --- .../selftests/vfio/lib/include/vfio_util.h | 27 ++++- .../selftests/vfio/lib/vfio_pci_device.c | 108 ++++++++++++++---- .../selftests/vfio/vfio_dma_mapping_test.c | 5 +- 3 files changed, 110 insertions(+), 30 deletions(-) diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h index ed31606e01b7..240409bf5f8a 100644 --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h @@ -206,10 +206,29 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_ void vfio_pci_device_cleanup(struct vfio_pci_device *device); void vfio_pci_device_reset(struct vfio_pci_device *device); -void vfio_pci_dma_map(struct vfio_pci_device *device, - struct vfio_dma_region *region); -void vfio_pci_dma_unmap(struct vfio_pci_device *device, - struct vfio_dma_region *region); +int __vfio_pci_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region); +int __vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region, + u64 *unmapped); +int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped); + +static inline void vfio_pci_dma_map(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + VFIO_ASSERT_EQ(__vfio_pci_dma_map(device, region), 0); +} + +static inline void vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region) +{ + VFIO_ASSERT_EQ(__vfio_pci_dma_unmap(device, region, NULL), 0); +} + +static inline void vfio_pci_dma_unmap_all(struct vfio_pci_device *device) +{ + VFIO_ASSERT_EQ(__vfio_pci_dma_unmap_all(device, NULL), 0); +} void vfio_pci_config_access(struct vfio_pci_device *device, bool write, size_t config, size_t size, void *data); diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index 0921b2451ba5..a381fd253aa7 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -141,7 +142,7 @@ static void vfio_pci_irq_get(struct vfio_pci_device *device, u32 index, ioctl_assert(device->fd, VFIO_DEVICE_GET_IRQ_INFO, irq_info); } -static void vfio_iommu_dma_map(struct vfio_pci_device *device, +static int vfio_iommu_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region) { struct vfio_iommu_type1_dma_map args = { @@ -152,10 +153,13 @@ static void vfio_iommu_dma_map(struct vfio_pci_device *device, .size = region->size, }; - ioctl_assert(device->container_fd, VFIO_IOMMU_MAP_DMA, &args); + if (ioctl(device->container_fd, VFIO_IOMMU_MAP_DMA, &args)) + return -errno; + + return 0; } -static void iommufd_dma_map(struct vfio_pci_device *device, +static int iommufd_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region) { struct iommu_ioas_map args = { @@ -169,54 +173,108 @@ static void iommufd_dma_map(struct vfio_pci_device *device, .ioas_id = device->ioas_id, }; - ioctl_assert(device->iommufd, IOMMU_IOAS_MAP, &args); + if (ioctl(device->iommufd, IOMMU_IOAS_MAP, &args)) + return -errno; + + return 0; } -void vfio_pci_dma_map(struct vfio_pci_device *device, +int __vfio_pci_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region) { + int ret; + if (device->iommufd) - iommufd_dma_map(device, region); + ret = iommufd_dma_map(device, region); else - vfio_iommu_dma_map(device, region); + ret = vfio_iommu_dma_map(device, region); + + if (ret) + return ret; list_add(®ion->link, &device->dma_regions); + + return 0; } -static void vfio_iommu_dma_unmap(struct vfio_pci_device *device, - struct vfio_dma_region *region) +static int vfio_iommu_dma_unmap(int fd, u64 iova, u64 size, u32 flags, + u64 *unmapped) { struct vfio_iommu_type1_dma_unmap args = { .argsz = sizeof(args), - .iova = region->iova, - .size = region->size, + .iova = iova, + .size = size, + .flags = flags, }; - ioctl_assert(device->container_fd, VFIO_IOMMU_UNMAP_DMA, &args); + if (ioctl(fd, VFIO_IOMMU_UNMAP_DMA, &args)) + return -errno; + + if (unmapped) + *unmapped = args.size; + + return 0; } -static void iommufd_dma_unmap(struct vfio_pci_device *device, - struct vfio_dma_region *region) +static int iommufd_dma_unmap(int fd, u64 iova, u64 length, u32 ioas_id, + u64 *unmapped) { struct iommu_ioas_unmap args = { .size = sizeof(args), - .iova = region->iova, - .length = region->size, - .ioas_id = device->ioas_id, + .iova = iova, + .length = length, + .ioas_id = ioas_id, }; - ioctl_assert(device->iommufd, IOMMU_IOAS_UNMAP, &args); + if (ioctl(fd, IOMMU_IOAS_UNMAP, &args)) + return -errno; + + if (unmapped) + *unmapped = args.length; + + return 0; } -void vfio_pci_dma_unmap(struct vfio_pci_device *device, - struct vfio_dma_region *region) +int __vfio_pci_dma_unmap(struct vfio_pci_device *device, + struct vfio_dma_region *region, u64 *unmapped) { - if (device->iommufd) - iommufd_dma_unmap(device, region); - else - vfio_iommu_dma_unmap(device, region); + int ret; - list_del(®ion->link); + if (device->iommufd) + ret = iommufd_dma_unmap(device->iommufd, region->iova, + region->size, device->ioas_id, + unmapped); + else + ret = vfio_iommu_dma_unmap(device->container_fd, region->iova, + region->size, 0, unmapped); + + if (ret) + return ret; + + list_del_init(®ion->link); + + return 0; +} + +int __vfio_pci_dma_unmap_all(struct vfio_pci_device *device, u64 *unmapped) +{ + int ret; + struct vfio_dma_region *curr, *next; + + if (device->iommufd) + ret = iommufd_dma_unmap(device->iommufd, 0, UINT64_MAX, + device->ioas_id, unmapped); + else + ret = vfio_iommu_dma_unmap(device->container_fd, 0, 0, + VFIO_DMA_UNMAP_FLAG_ALL, unmapped); + + if (ret) + return ret; + + list_for_each_entry_safe(curr, next, &device->dma_regions, link) + list_del_init(&curr->link); + + return 0; } static void vfio_pci_region_get(struct vfio_pci_device *device, int index, diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c index ab19c54a774d..a38966e8e5a6 100644 --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -129,6 +129,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) struct vfio_dma_region region; struct iommu_mapping mapping; u64 mapping_size = size; + u64 unmapped; int rc; region.vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); @@ -184,7 +185,9 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) } unmap: - vfio_pci_dma_unmap(self->device, ®ion); + rc = __vfio_pci_dma_unmap(self->device, ®ion, &unmapped); + ASSERT_EQ(rc, 0); + ASSERT_EQ(unmapped, region.size); printf("Unmapped IOVA 0x%lx\n", region.iova); ASSERT_EQ(INVALID_IOVA, __to_iova(self->device, region.vaddr)); ASSERT_NE(0, iommu_mapping_get(device_bdf, region.iova, &mapping)); From de8d1f2fd5a510bf2c1c25b84e1a718a0f0af105 Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 28 Oct 2025 09:15:04 -0700 Subject: [PATCH 0497/1075] vfio: selftests: add end of address space DMA map/unmap tests Add tests which validate dma map/unmap at the end of address space. Add negative test cases for checking that overflowing ioctl args fail with the expected errno. Reviewed-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251028-fix-unmap-v6-5-2542b96bcc8e@fb.com Signed-off-by: Alex Williamson --- .../selftests/vfio/vfio_dma_mapping_test.c | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c index a38966e8e5a6..4f1ea79a200c 100644 --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -112,6 +112,8 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous, 0, 0); FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_2mb, SZ_2M, MAP_HUGETLB | MAP_HUGE_2MB); FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | MAP_HUGE_1GB); +#undef FIXTURE_VARIANT_ADD_IOMMU_MODE + FIXTURE_SETUP(vfio_dma_mapping_test) { self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); @@ -195,6 +197,94 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) ASSERT_TRUE(!munmap(region.vaddr, size)); } +FIXTURE(vfio_dma_map_limit_test) { + struct vfio_pci_device *device; + struct vfio_dma_region region; + size_t mmap_size; +}; + +FIXTURE_VARIANT(vfio_dma_map_limit_test) { + const char *iommu_mode; +}; + +#define FIXTURE_VARIANT_ADD_IOMMU_MODE(_iommu_mode) \ +FIXTURE_VARIANT_ADD(vfio_dma_map_limit_test, _iommu_mode) { \ + .iommu_mode = #_iommu_mode, \ +} + +FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(); + +#undef FIXTURE_VARIANT_ADD_IOMMU_MODE + +FIXTURE_SETUP(vfio_dma_map_limit_test) +{ + struct vfio_dma_region *region = &self->region; + u64 region_size = getpagesize(); + + /* + * Over-allocate mmap by double the size to provide enough backing vaddr + * for overflow tests + */ + self->mmap_size = 2 * region_size; + + self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + region->vaddr = mmap(NULL, self->mmap_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ASSERT_NE(region->vaddr, MAP_FAILED); + + /* One page prior to the end of address space */ + region->iova = ~(iova_t)0 & ~(region_size - 1); + region->size = region_size; +} + +FIXTURE_TEARDOWN(vfio_dma_map_limit_test) +{ + vfio_pci_device_cleanup(self->device); + ASSERT_EQ(munmap(self->region.vaddr, self->mmap_size), 0); +} + +TEST_F(vfio_dma_map_limit_test, unmap_range) +{ + struct vfio_dma_region *region = &self->region; + u64 unmapped; + int rc; + + vfio_pci_dma_map(self->device, region); + ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr)); + + rc = __vfio_pci_dma_unmap(self->device, region, &unmapped); + ASSERT_EQ(rc, 0); + ASSERT_EQ(unmapped, region->size); +} + +TEST_F(vfio_dma_map_limit_test, unmap_all) +{ + struct vfio_dma_region *region = &self->region; + u64 unmapped; + int rc; + + vfio_pci_dma_map(self->device, region); + ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr)); + + rc = __vfio_pci_dma_unmap_all(self->device, &unmapped); + ASSERT_EQ(rc, 0); + ASSERT_EQ(unmapped, region->size); +} + +TEST_F(vfio_dma_map_limit_test, overflow) +{ + struct vfio_dma_region *region = &self->region; + int rc; + + region->size = self->mmap_size; + + rc = __vfio_pci_dma_map(self->device, region); + ASSERT_EQ(rc, -EOVERFLOW); + + rc = __vfio_pci_dma_unmap(self->device, region, NULL); + ASSERT_EQ(rc, -EOVERFLOW); +} + int main(int argc, char *argv[]) { device_bdf = vfio_selftests_get_bdf(&argc, argv); From 2cbb259ec4f8e12dade80b388b81d41fa22187d2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 23 Oct 2025 14:55:32 +0200 Subject: [PATCH 0498/1075] bpf: Reject negative head_room in __bpf_skb_change_head Yinhao et al. recently reported: Our fuzzing tool was able to create a BPF program which triggered the below BUG condition inside pskb_expand_head. [ 23.016047][T10006] kernel BUG at net/core/skbuff.c:2232! [...] [ 23.017301][T10006] RIP: 0010:pskb_expand_head+0x1519/0x1530 [...] [ 23.021249][T10006] Call Trace: [ 23.021387][T10006] [ 23.021507][T10006] ? __pfx_pskb_expand_head+0x10/0x10 [ 23.021725][T10006] __bpf_skb_change_head+0x22a/0x520 [ 23.021939][T10006] bpf_skb_change_head+0x34/0x1b0 [ 23.022143][T10006] ___bpf_prog_run+0xf70/0xb670 [ 23.022342][T10006] __bpf_prog_run32+0xed/0x140 [...] The problem is that in __bpf_skb_change_head() we need to reject a negative head_room as otherwise this propagates all the way to the pskb_expand_head() from skb_cow(). For example, if the BPF test infra passes a skb with gso_skb:1 to the BPF helper with a negative head_room of -22, then this gets passed into skb_cow(). __skb_cow() in this example calculates a delta of -86 which gets aligned to -64, and then triggers BUG_ON(nhead < 0). Thus, reject malformed negative input. Fixes: 3a0af8fd61f9 ("bpf: BPF for lightweight tunnel infrastructure") Reported-by: Yinhao Hu Reported-by: Kaiyan Mei Signed-off-by: Daniel Borkmann Signed-off-by: Martin KaFai Lau Reviewed-by: Dongliang Mu Link: https://patch.msgid.link/20251023125532.182262-1-daniel@iogearbox.net --- net/core/filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index 76628df1fc82..fa06c5a08e22 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3877,7 +3877,8 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room, u32 new_len = skb->len + head_room; int ret; - if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) || + if (unlikely(flags || (int)head_room < 0 || + (!skb_is_gso(skb) && new_len > max_len) || new_len < skb->len)) return -EINVAL; From 18cd0a9c7aaf880502e4aff3ea30022f97d6c103 Mon Sep 17 00:00:00 2001 From: PIYUSH CHOUDHARY Date: Mon, 20 Oct 2025 00:05:08 +0530 Subject: [PATCH 0499/1075] video: fb: Fix typo in comment in fb.h Fix typo: "verical" -> "vertical" in macro description Signed-off-by: PIYUSH CHOUDHARY Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- include/uapi/linux/fb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/fb.h b/include/uapi/linux/fb.h index cde8f173f566..22acaaec7b1c 100644 --- a/include/uapi/linux/fb.h +++ b/include/uapi/linux/fb.h @@ -319,7 +319,7 @@ enum { #define FB_VBLANK_HAVE_VCOUNT 0x020 /* the vcount field is valid */ #define FB_VBLANK_HAVE_HCOUNT 0x040 /* the hcount field is valid */ #define FB_VBLANK_VSYNCING 0x080 /* currently in a vsync */ -#define FB_VBLANK_HAVE_VSYNC 0x100 /* verical syncs can be detected */ +#define FB_VBLANK_HAVE_VSYNC 0x100 /* vertical syncs can be detected */ struct fb_vblank { __u32 flags; /* FB_VBLANK flags */ From eb53368f8d6e2dfba84c8a94d245719bcf9ae270 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 27 Oct 2025 16:43:37 +0800 Subject: [PATCH 0500/1075] fbdev: valkyriefb: Fix reference count leak in valkyriefb_init The of_find_node_by_name() function returns a device tree node with its reference count incremented. The caller is responsible for calling of_node_put() to release this reference when done. Found via static analysis. Fixes: cc5d0189b9ba ("[PATCH] powerpc: Remove device_node addrs/n_addr") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Signed-off-by: Helge Deller --- drivers/video/fbdev/valkyriefb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/video/fbdev/valkyriefb.c b/drivers/video/fbdev/valkyriefb.c index 91d070ef6989..6ff059ee1694 100644 --- a/drivers/video/fbdev/valkyriefb.c +++ b/drivers/video/fbdev/valkyriefb.c @@ -329,11 +329,13 @@ static int __init valkyriefb_init(void) if (of_address_to_resource(dp, 0, &r)) { printk(KERN_ERR "can't find address for valkyrie\n"); + of_node_put(dp); return 0; } frame_buffer_phys = r.start; cmap_regs_phys = r.start + 0x304000; + of_node_put(dp); } #endif /* ppc (!CONFIG_MAC) */ From 5f566c0ac51cd2474e47da68dbe719d3acf7d999 Mon Sep 17 00:00:00 2001 From: Florian Fuchs Date: Sun, 26 Oct 2025 00:38:50 +0200 Subject: [PATCH 0501/1075] fbdev: pvr2fb: Fix leftover reference to ONCHIP_NR_DMA_CHANNELS Commit e24cca19babe ("sh: Kill off MAX_DMA_ADDRESS leftovers.") removed the define ONCHIP_NR_DMA_CHANNELS. So that the leftover reference needs to be replaced by CONFIG_NR_ONCHIP_DMA_CHANNELS to compile successfully with CONFIG_PVR2_DMA enabled. Signed-off-by: Florian Fuchs Reviewed-by: John Paul Adrian Glaubitz Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- drivers/video/fbdev/pvr2fb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index cbdb1caf61bd..0b8d23c12b77 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -192,7 +192,7 @@ static unsigned long pvr2fb_map; #ifdef CONFIG_PVR2_DMA static unsigned int shdma = PVR2_CASCADE_CHAN; -static unsigned int pvr2dma = ONCHIP_NR_DMA_CHANNELS; +static unsigned int pvr2dma = CONFIG_NR_ONCHIP_DMA_CHANNELS; #endif static struct fb_videomode pvr2_modedb[] = { From 18c4ef4e765a798b47980555ed665d78b71aeadf Mon Sep 17 00:00:00 2001 From: Junjie Cao Date: Mon, 20 Oct 2025 21:47:01 +0800 Subject: [PATCH 0502/1075] fbdev: bitblit: bound-check glyph index in bit_putcs* bit_putcs_aligned()/unaligned() derived the glyph pointer from the character value masked by 0xff/0x1ff, which may exceed the actual font's glyph count and read past the end of the built-in font array. Clamp the index to the actual glyph count before computing the address. This fixes a global out-of-bounds read reported by syzbot. Reported-by: syzbot+793cf822d213be1a74f2@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=793cf822d213be1a74f2 Tested-by: syzbot+793cf822d213be1a74f2@syzkaller.appspotmail.com Signed-off-by: Junjie Cao Reviewed-by: Thomas Zimmermann Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- drivers/video/fbdev/core/bitblit.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c index a9ec7f488522..dc5ad3fcc7be 100644 --- a/drivers/video/fbdev/core/bitblit.c +++ b/drivers/video/fbdev/core/bitblit.c @@ -79,12 +79,16 @@ static inline void bit_putcs_aligned(struct vc_data *vc, struct fb_info *info, struct fb_image *image, u8 *buf, u8 *dst) { u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; + unsigned int charcnt = vc->vc_font.charcount; u32 idx = vc->vc_font.width >> 3; u8 *src; while (cnt--) { - src = vc->vc_font.data + (scr_readw(s++)& - charmask)*cellsize; + u16 ch = scr_readw(s++) & charmask; + + if (ch >= charcnt) + ch = 0; + src = vc->vc_font.data + (unsigned int)ch * cellsize; if (attr) { update_attr(buf, src, attr, vc); @@ -112,14 +116,18 @@ static inline void bit_putcs_unaligned(struct vc_data *vc, u8 *dst) { u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; + unsigned int charcnt = vc->vc_font.charcount; u32 shift_low = 0, mod = vc->vc_font.width % 8; u32 shift_high = 8; u32 idx = vc->vc_font.width >> 3; u8 *src; while (cnt--) { - src = vc->vc_font.data + (scr_readw(s++)& - charmask)*cellsize; + u16 ch = scr_readw(s++) & charmask; + + if (ch >= charcnt) + ch = 0; + src = vc->vc_font.data + (unsigned int)ch * cellsize; if (attr) { update_attr(buf, src, attr, vc); From a1f3058930745d2b938b6b4f5bd9630dc74b26b7 Mon Sep 17 00:00:00 2001 From: Quanmin Yan Date: Fri, 10 Oct 2025 16:16:59 +0800 Subject: [PATCH 0503/1075] fbcon: Set fb_display[i]->mode to NULL when the mode is released Recently, we discovered the following issue through syzkaller: BUG: KASAN: slab-use-after-free in fb_mode_is_equal+0x285/0x2f0 Read of size 4 at addr ff11000001b3c69c by task syz.xxx ... Call Trace: dump_stack_lvl+0xab/0xe0 print_address_description.constprop.0+0x2c/0x390 print_report+0xb9/0x280 kasan_report+0xb8/0xf0 fb_mode_is_equal+0x285/0x2f0 fbcon_mode_deleted+0x129/0x180 fb_set_var+0xe7f/0x11d0 do_fb_ioctl+0x6a0/0x750 fb_ioctl+0xe0/0x140 __x64_sys_ioctl+0x193/0x210 do_syscall_64+0x5f/0x9c0 entry_SYSCALL_64_after_hwframe+0x76/0x7e Based on experimentation and analysis, during framebuffer unregistration, only the memory of fb_info->modelist is freed, without setting the corresponding fb_display[i]->mode to NULL for the freed modes. This leads to UAF issues during subsequent accesses. Here's an example of reproduction steps: 1. With /dev/fb0 already registered in the system, load a kernel module to register a new device /dev/fb1; 2. Set fb1's mode to the global fb_display[] array (via FBIOPUT_CON2FBMAP); 3. Switch console from fb to VGA (to allow normal rmmod of the ko); 4. Unload the kernel module, at this point fb1's modelist is freed, leaving a wild pointer in fb_display[]; 5. Trigger the bug via system calls through fb0 attempting to delete a mode from fb0. Add a check in do_unregister_framebuffer(): if the mode to be freed exists in fb_display[], set the corresponding mode pointer to NULL. Signed-off-by: Quanmin Yan Reviewed-by: Thomas Zimmermann Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- drivers/video/fbdev/core/fbcon.c | 19 +++++++++++++++++++ drivers/video/fbdev/core/fbmem.c | 1 + include/linux/fbcon.h | 2 ++ 3 files changed, 22 insertions(+) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 96cc9b389246..9bd3c3814b5c 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2810,6 +2810,25 @@ int fbcon_mode_deleted(struct fb_info *info, return found; } +static void fbcon_delete_mode(struct fb_videomode *m) +{ + struct fbcon_display *p; + + for (int i = first_fb_vc; i <= last_fb_vc; i++) { + p = &fb_display[i]; + if (p->mode == m) + p->mode = NULL; + } +} + +void fbcon_delete_modelist(struct list_head *head) +{ + struct fb_modelist *modelist; + + list_for_each_entry(modelist, head, list) + fbcon_delete_mode(&modelist->mode); +} + #ifdef CONFIG_VT_HW_CONSOLE_BINDING static void fbcon_unbind(void) { diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 53f1719b1ae1..eff757ebbed1 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -544,6 +544,7 @@ static void do_unregister_framebuffer(struct fb_info *fb_info) fb_info->pixmap.addr = NULL; } + fbcon_delete_modelist(&fb_info->modelist); fb_destroy_modelist(&fb_info->modelist); registered_fb[fb_info->node] = NULL; num_registered_fb--; diff --git a/include/linux/fbcon.h b/include/linux/fbcon.h index 81f0e698acbf..f206370060e1 100644 --- a/include/linux/fbcon.h +++ b/include/linux/fbcon.h @@ -18,6 +18,7 @@ void fbcon_suspended(struct fb_info *info); void fbcon_resumed(struct fb_info *info); int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode); +void fbcon_delete_modelist(struct list_head *head); void fbcon_new_modelist(struct fb_info *info); void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps); @@ -38,6 +39,7 @@ static inline void fbcon_suspended(struct fb_info *info) {} static inline void fbcon_resumed(struct fb_info *info) {} static inline int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { return 0; } +static inline void fbcon_delete_modelist(struct list_head *head) {} static inline void fbcon_new_modelist(struct fb_info *info) {} static inline void fbcon_get_requirement(struct fb_info *info, struct fb_blit_caps *caps) {} From 7073c7fc8d8ba47194e5fc58fcafc0efe7586e9b Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Fri, 24 Oct 2025 18:37:15 +0900 Subject: [PATCH 0504/1075] fbdev: atyfb: Check if pll_ops->init_pll failed Actually check the return value from pll_ops->init_pll() as it can return an error. If the card's BIOS didn't run because it's not the primary VGA card the fact that the xclk source is unsupported is printed as shown below but the driver continues on regardless and on my machine causes a hard lock up. [ 61.470088] atyfb 0000:03:05.0: enabling device (0080 -> 0083) [ 61.476191] atyfb: using auxiliary register aperture [ 61.481239] atyfb: 3D RAGE XL (Mach64 GR, PCI-33) [0x4752 rev 0x27] [ 61.487569] atyfb: 512K SGRAM (1:1), 14.31818 MHz XTAL, 230 MHz PLL, 83 Mhz MCLK, 63 MHz XCLK [ 61.496112] atyfb: Unsupported xclk source: 5. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Daniel Palmer Signed-off-by: Helge Deller Cc: stable@vger.kernel.org --- drivers/video/fbdev/aty/atyfb_base.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/aty/atyfb_base.c b/drivers/video/fbdev/aty/atyfb_base.c index 210fd3ac18a4..56ef1d88e003 100644 --- a/drivers/video/fbdev/aty/atyfb_base.c +++ b/drivers/video/fbdev/aty/atyfb_base.c @@ -2614,8 +2614,12 @@ static int aty_init(struct fb_info *info) pr_cont("\n"); } #endif - if (par->pll_ops->init_pll) - par->pll_ops->init_pll(info, &par->pll); + if (par->pll_ops->init_pll) { + ret = par->pll_ops->init_pll(info, &par->pll); + if (ret) + return ret; + } + if (par->pll_ops->resume_pll) par->pll_ops->resume_pll(info, &par->pll); From 93c97bc8d85d5742d6f000d8bf3eeeb705bc6082 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 27 Oct 2025 14:09:48 +0100 Subject: [PATCH 0505/1075] drm/msm: dsi: fix PLL init in bonded mode When in bonded DSI mode, only one PLL in one DSI PHY is used for both DSI PHYs, meaning that parents of the secondary DSI PHY will use the primary DSI PHY PLL as parent. In this case the primary DSI PHY PLL will be set even if the primary DSI PHY is not yet enabled. The DSI PHY code has support for this particular use-case and will handle the fact the PLL was already set when initializing the primary DSI PHY. By introducing a protected variable pll_enable_cnt in the commit cb55f39bf7b1 ("drm/msm/dsi/phy: Fix reading zero as PLL rates when unprepared"), this variable is only initially set to 1 when the DSI PHY is initialized making it impossible to set the PLL before, breaking the bonded DSI use case by returning 0 when setting the PLL from the secondary DSI PHY driver and skipping the correct clocks initialization. But since it was already possible to set the PLL without enabling the DSI PHY, just drop the pll_enable_cnt setting from the PHY enable/disable and simply increment/decrement the pll_enable_cnt variable from the dsi_pll_enable/disable_pll_bias to make sure any PLL operation is done with the PLL BIAS enabled. Fixes: cb55f39bf7b1 ("drm/msm/dsi/phy: Fix reading zero as PLL rates when unprepared") Closes: https://lore.kernel.org/all/50a49d72-2b1e-471d-b0c4-d5a0b38b2a21@linaro.org/ Tested-by: Krzysztof Kozlowski Signed-off-by: Neil Armstrong Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/683688/ Link: https://lore.kernel.org/r/20251027-topic-sm8x50-fix-dsi-bonded-v1-1-a477cd3f907d@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.h | 1 - drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c | 18 ++---------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index e391505fdaf0..3cbf08231492 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -109,7 +109,6 @@ struct msm_dsi_phy { struct msm_dsi_dphy_timing timing; const struct msm_dsi_phy_cfg *cfg; void *tuning_cfg; - void *pll_data; enum msm_dsi_phy_usecase usecase; bool regulator_ldo_mode; diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 32f06edd21a9..c5e1d2016bcc 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -426,11 +426,8 @@ static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll) u32 data; spin_lock_irqsave(&pll->pll_enable_lock, flags); - if (pll->pll_enable_cnt++) { - spin_unlock_irqrestore(&pll->pll_enable_lock, flags); - WARN_ON(pll->pll_enable_cnt == INT_MAX); - return; - } + pll->pll_enable_cnt++; + WARN_ON(pll->pll_enable_cnt == INT_MAX); data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_0); data |= DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; @@ -876,7 +873,6 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) spin_lock_init(&pll_7nm->pll_enable_lock); pll_7nm->phy = phy; - phy->pll_data = pll_7nm; ret = pll_7nm_register(pll_7nm, phy->provided_clocks->hws); if (ret) { @@ -965,10 +961,8 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, u32 const delay_us = 5; u32 const timeout_us = 1000; struct msm_dsi_dphy_timing *timing = &phy->timing; - struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; bool less_than_1500_mhz; - unsigned long flags; u32 vreg_ctrl_0, vreg_ctrl_1, lane_ctrl0; u32 glbl_pemph_ctrl_0; u32 glbl_str_swi_cal_sel_ctrl, glbl_hstx_str_ctrl_0; @@ -1090,13 +1084,10 @@ static int dsi_7nm_phy_enable(struct msm_dsi_phy *phy, glbl_rescode_bot_ctrl = 0x3c; } - spin_lock_irqsave(&pll->pll_enable_lock, flags); - pll->pll_enable_cnt = 1; /* de-assert digital and pll power down */ data = DSI_7nm_PHY_CMN_CTRL_0_DIGTOP_PWRDN_B | DSI_7nm_PHY_CMN_CTRL_0_PLL_SHUTDOWNB; writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); - spin_unlock_irqrestore(&pll->pll_enable_lock, flags); /* Assert PLL core reset */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_PLL_CNTRL); @@ -1209,9 +1200,7 @@ static bool dsi_7nm_set_continuous_clock(struct msm_dsi_phy *phy, bool enable) static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) { - struct dsi_pll_7nm *pll = phy->pll_data; void __iomem *base = phy->base; - unsigned long flags; u32 data; DBG(""); @@ -1238,11 +1227,8 @@ static void dsi_7nm_phy_disable(struct msm_dsi_phy *phy) writel(data, base + REG_DSI_7nm_PHY_CMN_CTRL_0); writel(0, base + REG_DSI_7nm_PHY_CMN_LANE_CTRL0); - spin_lock_irqsave(&pll->pll_enable_lock, flags); - pll->pll_enable_cnt = 0; /* Turn off all PHY blocks */ writel(0x00, base + REG_DSI_7nm_PHY_CMN_CTRL_0); - spin_unlock_irqrestore(&pll->pll_enable_lock, flags); /* make sure phy is turned off */ wmb(); From 2319551e97f0cd9b4a7b78c1a6970aa4b785000b Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Fri, 17 Oct 2025 19:58:35 +0000 Subject: [PATCH 0506/1075] drm/msm/dpu: Fix allocation of RGB SSPPs without scaling Due to condition in dpu_rm_reserve_sspp, RGB SSPPs are only tried when scaling is requested, which prevents those SSPPs from being reserved if we don't need scaling at all. Instead we should check if YUV support is requested, since scaling on RGB SSPPs is optional and is not implemented in driver yet. Fixes: 774bcfb73176 ("drm/msm/dpu: add support for virtual planes") Signed-off-by: Vladimir Lypak Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/681914/ Link: https://lore.kernel.org/r/20251017-b4-dpu-fixes-v1-1-40ce5993eeb6@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c index 2c77c74fac0f..d9c3b0a1d091 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c @@ -842,7 +842,7 @@ struct dpu_hw_sspp *dpu_rm_reserve_sspp(struct dpu_rm *rm, if (!reqs->scale && !reqs->yuv) hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_DMA); - if (!hw_sspp && reqs->scale) + if (!hw_sspp && !reqs->yuv) hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_RGB); if (!hw_sspp) hw_sspp = dpu_rm_try_sspp(rm, global_state, crtc, reqs, SSPP_TYPE_VIG); From 23ab0d6228bf9de6cf69db330f95a1938b276693 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Fri, 17 Oct 2025 19:58:36 +0000 Subject: [PATCH 0507/1075] drm/msm/dpu: Propagate error from dpu_assign_plane_resources The dpu_plane_virtual_assign_resources function might fail if there is no suitable SSPP(s) for the plane. This leaves sspp field in plane state uninitialized and later leads to NULL dereference during commit: Call trace: _dpu_crtc_blend_setup+0x194/0x620 [msm] (P) dpu_crtc_atomic_begin+0xe4/0x240 [msm] drm_atomic_helper_commit_planes+0x88/0x358 msm_atomic_commit_tail+0x1b4/0x8b8 [msm] commit_tail+0xa8/0x1b0 drm_atomic_helper_commit+0x180/0x1a0 drm_atomic_commit+0x94/0xe0 drm_mode_atomic_ioctl+0xa88/0xd60 drm_ioctl_kernel+0xc4/0x138 drm_ioctl+0x364/0x4f0 __arm64_sys_ioctl+0xac/0x108 invoke_syscall.constprop.0+0x48/0x100 el0_svc_common.constprop.0+0x40/0xe8 do_el0_svc+0x24/0x38 el0_svc+0x30/0xe0 el0t_64_sync_handler+0xa0/0xe8 el0t_64_sync+0x198/0x1a0 Fixes: 3ed12a3664b3 ("drm/msm/dpu: allow sharing SSPP between planes") Signed-off-by: Vladimir Lypak Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/681916/ Link: https://lore.kernel.org/r/20251017-b4-dpu-fixes-v1-2-40ce5993eeb6@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index f54cf0faa1c7..d198a65a2c5f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1278,7 +1278,7 @@ int dpu_assign_plane_resources(struct dpu_global_state *global_state, state, plane_state, prev_adjacent_plane_state); if (ret) - break; + return ret; prev_adjacent_plane_state = plane_state; } From 425da3305972a7bab9812770d44e2f7f97f8bfd6 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Fri, 17 Oct 2025 19:58:37 +0000 Subject: [PATCH 0508/1075] drm/msm/dpu: Disable scaling for unsupported scaler types Scaling is not implemented for some type of scalers (QSEED2 and RGB) but it was unintentionally re-enabled with change below. The remaining condition in dpu_plane_atomic_check_pipe is not enough because it only checks for length of scaler block (which is present). This patch adds a additional check for setup_scaler operation. Fixes: 8f15005783b8 ("drm/msm/dpu: move scaling limitations out of the hw_catalog") Signed-off-by: Vladimir Lypak Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/681918/ Link: https://lore.kernel.org/r/20251017-b4-dpu-fixes-v1-3-40ce5993eeb6@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index d198a65a2c5f..6effe0fa4837 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -740,7 +740,7 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, * We already have verified scaling against platform limitations. * Now check if the SSPP supports scaling at all. */ - if (!sblk->scaler_blk.len && + if (!(sblk->scaler_blk.len && pipe->sspp->ops.setup_scaler) && ((drm_rect_width(&new_plane_state->src) >> 16 != drm_rect_width(&new_plane_state->dst)) || (drm_rect_height(&new_plane_state->src) >> 16 != From 2f8bed9175429294860276b897de53d49ed647d8 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Fri, 17 Oct 2025 19:58:38 +0000 Subject: [PATCH 0509/1075] drm/msm/dpu: Fix pixel extension sub-sampling In _dpu_plane_setup_pixel_ext function instead of dividing just chroma source resolution once (component 1 and 2), second component is divided once more because src_w and src_h variable is reused between iterations. Third component receives wrong source resolution too (from component 2). To fix this introduce temporary variables for each iteration. Fixes: dabfdd89eaa9 ("drm/msm/disp/dpu1: add inline rotation support for sc7280") Signed-off-by: Vladimir Lypak Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/681921/ Link: https://lore.kernel.org/r/20251017-b4-dpu-fixes-v1-4-40ce5993eeb6@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 6effe0fa4837..905524ceeb1f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -500,13 +500,15 @@ static void _dpu_plane_setup_pixel_ext(struct dpu_hw_scaler3_cfg *scale_cfg, int i; for (i = 0; i < DPU_MAX_PLANES; i++) { + uint32_t w = src_w, h = src_h; + if (i == DPU_SSPP_COMP_1_2 || i == DPU_SSPP_COMP_2) { - src_w /= chroma_subsmpl_h; - src_h /= chroma_subsmpl_v; + w /= chroma_subsmpl_h; + h /= chroma_subsmpl_v; } - pixel_ext->num_ext_pxls_top[i] = src_h; - pixel_ext->num_ext_pxls_left[i] = src_w; + pixel_ext->num_ext_pxls_top[i] = h; + pixel_ext->num_ext_pxls_left[i] = w; } } From 5e0656b125346d9a1ef65e72c6d3196b8ea6b1e1 Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Fri, 17 Oct 2025 19:58:39 +0000 Subject: [PATCH 0510/1075] drm/msm/dpu: Require linear modifier for writeback framebuffers UBWC-related register configuration for writeback is not implemented in the driver yet but there aren't any checks for non-linear modifiers in atomic_check. Thus when compressed framebuffer is attached to writeback connector it will be filled with linear image data. This patch forbids non-linear modifiers for writeback framebuffers until UBWC support for writeback is properly implemented. Fixes: 71174f362d67 ("drm/msm/dpu: move writeback's atomic_check to dpu_writeback.c") Signed-off-by: Vladimir Lypak Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/681922/ Link: https://lore.kernel.org/r/20251017-b4-dpu-fixes-v1-5-40ce5993eeb6@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c index cd73468e369a..7545c0293efb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_writeback.c @@ -72,6 +72,9 @@ static int dpu_wb_conn_atomic_check(struct drm_connector *connector, DPU_ERROR("invalid fb w=%d, maxlinewidth=%u\n", fb->width, dpu_wb_conn->maxlinewidth); return -EINVAL; + } else if (fb->modifier != DRM_FORMAT_MOD_LINEAR) { + DPU_ERROR("unsupported fb modifier:%#llx\n", fb->modifier); + return -EINVAL; } return drm_atomic_helper_check_wb_connector_state(conn_state->connector, conn_state->state); From bbc65d1bde821750c48ac075057be548e38d77cc Mon Sep 17 00:00:00 2001 From: Vladimir Lypak Date: Sat, 18 Oct 2025 14:33:43 +0000 Subject: [PATCH 0511/1075] drm/msm/dpu: Disable broken YUV on QSEED2 hardware YUV formats on this hardware needs scaling for chroma planes. However it is not implemented for QSEED2 which breaks display pipeline if YUV format is used (causing partial and corrupted output with PPDONE timeouts). This patch temporarily disables YUV by switching affected sub-block to RGB only format list. Fixes: daf9a92daeb8 ("drm/msm/dpu: Add support for MSM8996") Signed-off-by: Vladimir Lypak Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/682061/ Link: https://lore.kernel.org/r/20251018-b4-dpu-fixes-v1-6-1852278064d0@gmail.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 6641455c4ec6..9f8d1bba9139 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -267,8 +267,8 @@ static const u32 wb2_formats_rgb_yuv[] = { .base = 0x200, .len = 0xa0,}, \ .csc_blk = {.name = "csc", \ .base = 0x320, .len = 0x100,}, \ - .format_list = plane_formats_yuv, \ - .num_formats = ARRAY_SIZE(plane_formats_yuv), \ + .format_list = plane_formats, \ + .num_formats = ARRAY_SIZE(plane_formats), \ .rotation_cfg = NULL, \ } From f5d079564c44baaeedf5e25f4b943aa042ea0eb1 Mon Sep 17 00:00:00 2001 From: Jessica Zhang Date: Tue, 23 Sep 2025 16:03:50 -0700 Subject: [PATCH 0512/1075] drm/msm/dpu: Fix adjusted mode clock check for 3d merge Since 3D merge allows for larger modes to be supported across 2 layer mixers, filter modes based on adjusted mode clock / 2 when 3d merge is supported. Reported-by: Abel Vesa Fixes: 62b7d6835288 ("drm/msm/dpu: Filter modes based on adjusted mode clock") Signed-off-by: Jessica Zhang Reviewed-by: Dmitry Baryshkov Reviewed-by: Abel Vesa Tested-by: Abel Vesa Tested-by: Krzysztof Kozlowski Patchwork: https://patchwork.freedesktop.org/patch/676353/ Link: https://lore.kernel.org/r/20250923-modeclk-fix-v2-1-01fcd0b2465a@oss.qualcomm.com Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 4b970a59deaf..2f8156051d9b 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -1545,6 +1545,9 @@ static enum drm_mode_status dpu_crtc_mode_valid(struct drm_crtc *crtc, adjusted_mode_clk = dpu_core_perf_adjusted_mode_clk(mode->clock, dpu_kms->perf.perf_cfg); + if (dpu_kms->catalog->caps->has_3d_merge) + adjusted_mode_clk /= 2; + /* * The given mode, adjusted for the perf clock factor, should not exceed * the max core clock rate From 36fedc44e37e811f25666c600bce4bc027290226 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 24 Oct 2025 21:07:33 +0200 Subject: [PATCH 0513/1075] dpll: fix device-id-get and pin-id-get to return errors properly The device-id-get and pin-id-get handlers were ignoring errors from the find functions and sending empty replies instead of returning error codes to userspace. When dpll_device_find_from_nlattr() or dpll_pin_find_from_nlattr() returned an error (e.g., -EINVAL for "multiple matches" or -ENODEV for "not found"), the handlers checked `if (!IS_ERR(ptr))` and skipped adding the device/pin handle to the message, but then still sent the empty message as a successful reply. This caused userspace tools to receive empty responses with id=0 instead of proper netlink errors with extack messages like "multiple matches". The bug is visible via strace, which shows the kernel sending TWO netlink messages in response to a single request: 1. Empty reply (20 bytes, just header, no attributes): recvfrom(3, [{nlmsg_len=20, nlmsg_type=dpll, nlmsg_flags=0, ...}, {cmd=0x7, version=1}], ...) 2. NLMSG_ERROR ACK with extack (because of NLM_F_ACK flag): recvfrom(3, [{nlmsg_len=60, nlmsg_type=NLMSG_ERROR, nlmsg_flags=NLM_F_CAPPED|NLM_F_ACK_TLVS, ...}, [{error=0, msg={...}}, [{nla_type=NLMSGERR_ATTR_MSG}, "multiple matches"]]], ...) The C YNL library parses the first message, sees an empty response, and creates a result object with calloc() which zero-initializes all fields, resulting in id=0. The Python YNL library parses both messages and displays the extack from the second NLMSG_ERROR message. Fix by checking `if (IS_ERR(ptr))` first and returning the error code immediately, so that netlink properly sends only NLMSG_ERROR with the extack message to userspace. After this fix, both C and Python YNL tools receive only the NLMSG_ERROR and behave consistently. This affects: - DPLL_CMD_DEVICE_ID_GET: now properly returns error when multiple devices match the criteria (e.g., same module-name + clock-id) - DPLL_CMD_PIN_ID_GET: now properly returns error when multiple pins match the criteria (e.g., same module-name) Before fix: $ dpll pin id-get module-name ice 0 (wrong - should be error, there are 17 pins with module-name "ice") After fix: $ dpll pin id-get module-name ice Error: multiple matches (correct - kernel reports the ambiguity via extack) Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Link: https://patch.msgid.link/20251024190733.364101-1-poros@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_netlink.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 74c1f0ca95f2..a4153bcb6dcf 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -1559,16 +1559,18 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info) return -EMSGSIZE; } pin = dpll_pin_find_from_nlattr(info); - if (!IS_ERR(pin)) { - if (!dpll_pin_available(pin)) { - nlmsg_free(msg); - return -ENODEV; - } - ret = dpll_msg_add_pin_handle(msg, pin); - if (ret) { - nlmsg_free(msg); - return ret; - } + if (IS_ERR(pin)) { + nlmsg_free(msg); + return PTR_ERR(pin); + } + if (!dpll_pin_available(pin)) { + nlmsg_free(msg); + return -ENODEV; + } + ret = dpll_msg_add_pin_handle(msg, pin); + if (ret) { + nlmsg_free(msg); + return ret; } genlmsg_end(msg, hdr); @@ -1735,12 +1737,14 @@ int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info) } dpll = dpll_device_find_from_nlattr(info); - if (!IS_ERR(dpll)) { - ret = dpll_msg_add_dev_handle(msg, dpll); - if (ret) { - nlmsg_free(msg); - return ret; - } + if (IS_ERR(dpll)) { + nlmsg_free(msg); + return PTR_ERR(dpll); + } + ret = dpll_msg_add_dev_handle(msg, dpll); + if (ret) { + nlmsg_free(msg); + return ret; } genlmsg_end(msg, hdr); From d8d2b1f81530988abe2e2bfaceec1c5d30b9a0b4 Mon Sep 17 00:00:00 2001 From: Pavel Zhigulin Date: Fri, 24 Oct 2025 19:13:02 +0300 Subject: [PATCH 0514/1075] net: cxgb4/ch_ipsec: fix potential use-after-free in ch_ipsec_xfrm_add_state() callback In ch_ipsec_xfrm_add_state() there is not check of try_module_get return value. It is very unlikely, but try_module_get() could return false value, which could cause use-after-free error. Conditions: The module count must be zero, and a module unload in progress. The thread doing the unload is blocked somewhere. Another thread makes a callback into the module for some request that (for instance) would need to create a kernel thread. It tries to get a reference for the thread. So try_module_get(THIS_MODULE) is the right call - and will fail here. This fix adds checking the result of try_module_get call Fixes: 6dad4e8ab3ec ("chcr: Add support for Inline IPSec") Signed-off-by: Pavel Zhigulin Link: https://patch.msgid.link/20251024161304.724436-1-Pavel.Zhigulin@kaspersky.com Signed-off-by: Jakub Kicinski --- .../ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c index ecd9a0bd5e18..49b57bb5fac1 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c @@ -290,9 +290,15 @@ static int ch_ipsec_xfrm_add_state(struct net_device *dev, return -EINVAL; } + if (unlikely(!try_module_get(THIS_MODULE))) { + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire module reference"); + return -ENODEV; + } + sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); if (!sa_entry) { res = -ENOMEM; + module_put(THIS_MODULE); goto out; } @@ -301,7 +307,6 @@ static int ch_ipsec_xfrm_add_state(struct net_device *dev, sa_entry->esn = 1; ch_ipsec_setkey(x, sa_entry); x->xso.offload_handle = (unsigned long)sa_entry; - try_module_get(THIS_MODULE); out: return res; } From 40c17a02de41f12dd713309c7d2546117c577d29 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 27 Oct 2025 15:09:12 +0100 Subject: [PATCH 0515/1075] dpll: zl3073x: Fix output pin registration Currently, the signal format of an associated output is not considered during output pin registration. As a result, the driver registers output pins that are disabled by the signal format configuration. Fix this by calling zl3073x_output_pin_is_enabled() to check whether a given output pin should be registered or not. Fixes: 75a71ecc2412 ("dpll: zl3073x: Register DPLL devices and pins") Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20251027140912.233152-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/dpll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 93dc93eec79e..f93f9a458324 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -1904,7 +1904,7 @@ zl3073x_dpll_pin_is_registrable(struct zl3073x_dpll *zldpll, } is_diff = zl3073x_out_is_diff(zldev, out); - is_enabled = zl3073x_out_is_enabled(zldev, out); + is_enabled = zl3073x_output_pin_is_enabled(zldev, index); } /* Skip N-pin if the corresponding input/output is differential */ From 12d2303db892d397373e2af40758cbd97309ec37 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 25 Oct 2025 09:46:40 +0800 Subject: [PATCH 0516/1075] net: hibmcge: fix rx buf avl irq is not re-enabled in irq_handle issue irq initialized with the macro HBG_ERR_IRQ_I will automatically be re-enabled, whereas those initialized with the macro HBG_IRQ_I will not be re-enabled. Since the rx buf avl irq is initialized using the macro HBG_IRQ_I, it needs to be actively re-enabled; otherwise priv->stats.rx_fifo_less_empty_thrsld_cnt cannot be correctly incremented. Fixes: fd394a334b1c ("net: hibmcge: Add support for abnormal irq handling feature") Signed-off-by: Jijie Shao Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251025014642.265259-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c index 8af0bc4cca21..ae4cb35186d8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c @@ -32,6 +32,7 @@ static void hbg_irq_handle_rx_buf_val(struct hbg_priv *priv, const struct hbg_irq_info *irq_info) { priv->stats.rx_fifo_less_empty_thrsld_cnt++; + hbg_hw_irq_enable(priv, irq_info->mask, true); } #define HBG_IRQ_I(name, handle) \ From 71eb8d1e07562b43bebc0c2721699814b43fd83d Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 25 Oct 2025 09:46:41 +0800 Subject: [PATCH 0517/1075] net: hibmcge: remove unnecessary check for np_link_fail in scenarios without phy. hibmcge driver uses fixed_phy to configure scenarios without PHY, where the driver is always in a linked state. However, there might be no link in hardware, so the np_link error is detected in hbg_hw_adjust_link(), which can cause abnormal logs. Therefore, in scenarios without a PHY, the driver no longer checks the np_link status. Fixes: 1d7cd7a9c69c ("net: hibmcge: support scenario without PHY") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20251025014642.265259-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h | 1 + drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 3 +++ drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index ea09a09c451b..2097e4c2b3d7 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -17,6 +17,7 @@ #define HBG_PCU_CACHE_LINE_SIZE 32 #define HBG_TX_TIMEOUT_BUF_LEN 1024 #define HBG_RX_DESCR 0x01 +#define HBG_NO_PHY 0xFF #define HBG_PACKET_HEAD_SIZE ((HBG_RX_SKIP1 + HBG_RX_SKIP2 + \ HBG_RX_DESCR) * HBG_PCU_CACHE_LINE_SIZE) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index d0aa0661ecd4..d6e8ce8e351a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -244,6 +244,9 @@ void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) hbg_hw_mac_enable(priv, HBG_STATUS_ENABLE); + if (priv->mac.phy_addr == HBG_NO_PHY) + return; + /* wait MAC link up */ ret = readl_poll_timeout(priv->io_base + HBG_REG_AN_NEG_STATE_ADDR, link_status, diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index 37791de47f6f..b6f0a2780ea8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -20,7 +20,6 @@ #define HBG_MDIO_OP_INTERVAL_US (5 * 1000) #define HBG_NP_LINK_FAIL_RETRY_TIMES 5 -#define HBG_NO_PHY 0xFF static void hbg_mdio_set_command(struct hbg_mac *mac, u32 cmd) { From 7e2958aee59ceb30ef153387741d12385b712250 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 25 Oct 2025 09:46:42 +0800 Subject: [PATCH 0518/1075] net: hibmcge: fix the inappropriate netif_device_detach() current, driver will call netif_device_detach() in pci_error_handlers.error_detected() and do reset in pci_error_handlers.slot_reset(). However, if pci_error_handlers.slot_reset() is not called after pci_error_handlers.error_detected(), driver will be detached and unable to recover. drivers/pci/pcie/err.c/report_error_detected() says: If any device in the subtree does not have an error_detected callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent error callbacks of any device in the subtree, and will exit in the disconnected error state. Therefore, when the hibmcge device and other devices that do not support the error_detected callback are under the same subtree, hibmcge will be unable to do slot_reset even for non-fatal errors. This path move netif_device_detach() from error_detected() to slot_reset(), ensuring that detach and reset are always executed together. Fixes: fd394a334b1c ("net: hibmcge: Add support for abnormal irq handling feature") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20251025014642.265259-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 83cf75bf7a17..e11495b7ee98 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -136,12 +136,11 @@ static pci_ers_result_t hbg_pci_err_detected(struct pci_dev *pdev, { struct net_device *netdev = pci_get_drvdata(pdev); - netif_device_detach(netdev); - - if (state == pci_channel_io_perm_failure) + if (state == pci_channel_io_perm_failure) { + netif_device_detach(netdev); return PCI_ERS_RESULT_DISCONNECT; + } - pci_disable_device(pdev); return PCI_ERS_RESULT_NEED_RESET; } @@ -150,6 +149,9 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct hbg_priv *priv = netdev_priv(netdev); + netif_device_detach(netdev); + pci_disable_device(pdev); + if (pci_enable_device(pdev)) { dev_err(&pdev->dev, "failed to re-enable PCI device after reset\n"); From 23ee8a2563a0f24cf4964685ced23c32be444ab8 Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Tue, 28 Oct 2025 20:08:59 +0800 Subject: [PATCH 0519/1075] dma-mapping: benchmark: Restore padding to ensure uABI remained consistent The padding field in the structure was previously reserved to maintain a stable interface for potential new fields, ensuring compatibility with user-space shared data structures. However,it was accidentally removed by tiantao in a prior commit, which may lead to incompatibility between user space and the kernel. This patch reinstates the padding to restore the original structure layout and preserve compatibility. Fixes: 8ddde07a3d28 ("dma-mapping: benchmark: extract a common header file for map_benchmark definition") Cc: stable@vger.kernel.org Acked-by: Barry Song Signed-off-by: Qinxin Xia Reported-by: Barry Song Closes: https://lore.kernel.org/lkml/CAGsJ_4waiZ2+NBJG+SCnbNk+nQ_ZF13_Q5FHJqZyxyJTcEop2A@mail.gmail.com/ Reviewed-by: Jonathan Cameron Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20251028120900.2265511-2-xiaqinxin@huawei.com --- include/linux/map_benchmark.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/map_benchmark.h b/include/linux/map_benchmark.h index 62674c83bde4..48e2ff95332f 100644 --- a/include/linux/map_benchmark.h +++ b/include/linux/map_benchmark.h @@ -27,5 +27,6 @@ struct map_benchmark { __u32 dma_dir; /* DMA data direction */ __u32 dma_trans_ns; /* time for DMA transmission in ns */ __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ + __u8 expansion[76]; /* For future use */ }; #endif /* _KERNEL_DMA_BENCHMARK_H */ From 0ba6502ce167fc3d598c08c2cc3b4ed7ca5aa251 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 28 Oct 2025 14:42:14 +0800 Subject: [PATCH 0520/1075] perf/x86/intel: Fix KASAN global-out-of-bounds warning When running "perf mem record" command on CWF, the below KASAN global-out-of-bounds warning is seen. ================================================================== BUG: KASAN: global-out-of-bounds in cmt_latency_data+0x176/0x1b0 Read of size 4 at addr ffffffffb721d000 by task dtlb/9850 Call Trace: kasan_report+0xb8/0xf0 cmt_latency_data+0x176/0x1b0 setup_arch_pebs_sample_data+0xf49/0x2560 intel_pmu_drain_arch_pebs+0x577/0xb00 handle_pmi_common+0x6c4/0xc80 The issue is caused by below code in __grt_latency_data(). The code tries to access x86_hybrid_pmu structure which doesn't exist on non-hybrid platform like CWF. WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big) So add is_hybrid() check before calling this WARN_ON_ONCE to fix the global-out-of-bounds access issue. Fixes: 090262439f66 ("perf/x86/intel: Rename model-specific pebs_latency_data functions") Reported-by: Xudong Hao Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Zide Chen Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251028064214.1451968-1-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/ds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index c0b7ac1c7594..01bc59e9286c 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -317,7 +317,8 @@ static u64 __grt_latency_data(struct perf_event *event, u64 status, { u64 val; - WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big); + WARN_ON_ONCE(is_hybrid() && + hybrid_pmu(event->pmu)->pmu_type == hybrid_big); dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; val = hybrid_var(event->pmu, pebs_data_source)[dse]; From b796a8feb7cb094ee998931a96cd6152a9d3022e Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Mon, 8 Sep 2025 14:16:38 +0800 Subject: [PATCH 0521/1075] perf/x86/intel: Add PMU support for WildcatLake WildcatLake is a variant of PantherLake and shares same PMU features, so directly reuse Pantherlake's code to enable PMU features for WildcatLake. Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Zide Chen Link: https://patch.msgid.link/20250908061639.938105-1-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 28f5468a6ea3..fe65be0b9d9c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -7596,6 +7596,7 @@ __init int intel_pmu_init(void) break; case INTEL_PANTHERLAKE_L: + case INTEL_WILDCATLAKE_L: pr_cont("Pantherlake Hybrid events, "); name = "pantherlake_hybrid"; goto lnl_common; From f4c12e5cefc8ec2eda93bc17ea734407228449ab Mon Sep 17 00:00:00 2001 From: dongsheng Date: Mon, 8 Sep 2025 14:16:39 +0800 Subject: [PATCH 0522/1075] perf/x86/intel/uncore: Add uncore PMU support for Wildcat Lake WildcatLake (WCL) is a variant of PantherLake (PTL) and shares the same uncore PMU features with PTL. Therefore, directly reuse Pantherlake's uncore PMU enabling code for WildcatLake. Signed-off-by: dongsheng Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20250908061639.938105-2-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/uncore.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index a762f7f5b161..d6c945cc5d07 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1895,6 +1895,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_VFM(INTEL_ARROWLAKE_H, &mtl_uncore_init), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_uncore_init), X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &ptl_uncore_init), + X86_MATCH_VFM(INTEL_WILDCATLAKE_L, &ptl_uncore_init), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_VFM(INTEL_GRANITERAPIDS_X, &gnr_uncore_init), From 330e2c514823008b22e6afd2055715bc46dd8d55 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 22 Oct 2025 19:48:32 +0100 Subject: [PATCH 0523/1075] afs: Fix dynamic lookup to fail on cell lookup failure When a process tries to access an entry in /afs, normally what happens is that an automount dentry is created by ->lookup() and then triggered, which jumps through the ->d_automount() op. Currently, afs_dynroot_lookup() does not do cell DNS lookup, leaving that to afs_d_automount() to perform - however, it is possible to use access() or stat() on the automount point, which will always return successfully, have briefly created an afs_cell record if one did not already exist. This means that something like: test -d "/afs/.west" && echo Directory exists will print "Directory exists" even though no such cell is configured. This breaks the "west" python module available on PIP as it expects this access to fail. Now, it could be possible to make afs_dynroot_lookup() perform the DNS[*] lookup, but that would make "ls --color /afs" do this for each cell in /afs that is listed but not yet probed. kafs-client, probably wrongly, preloads the entire cell database and all the known cells are then listed in /afs - and doing ls /afs would be very, very slow, especially if any cell supplied addresses but was wholly inaccessible. [*] When I say "DNS", actually read getaddrinfo(), which could use any one of a host of mechanisms. Could also use static configuration. To fix this, make the following changes: (1) Create an enum to specify the origination point of a call to afs_lookup_cell() and pass this value into that function in place of the "excl" parameter (which can be derived from it). There are six points of origination: - Cell preload through /proc/net/afs/cells - Root cell config through /proc/net/afs/rootcell - Lookup in dynamic root - Automount trigger - Direct mount with mount() syscall - Alias check where YFS tells us the cell name is different (2) Add an extra state into the afs_cell state machine to indicate a cell that's been initialised, but not yet looked up. This is separate from one that can be considered active and has been looked up at least once. (3) Make afs_lookup_cell() vary its behaviour more, depending on where it was called from: If called from preload or root cell config, DNS lookup will not happen until we definitely want to use the cell (dynroot mount, automount, direct mount or alias check). The cell will appear in /afs but stat() won't trigger DNS lookup. If the cell already exists, dynroot will not wait for the DNS lookup to complete. If the cell did not already exist, dynroot will wait. If called from automount, direct mount or alias check, it will wait for the DNS lookup to complete. (4) Make afs_lookup_cell() return an error if lookup failed in one way or another. We try to return -ENOENT if the DNS says the cell does not exist and -EDESTADDRREQ if we couldn't access the DNS. Reported-by: Markus Suvanto Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220685 Signed-off-by: David Howells Link: https://patch.msgid.link/1784747.1761158912@warthog.procyon.org.uk Fixes: 1d0b929fc070 ("afs: Change dynroot to create contents on demand") Tested-by: Markus Suvanto cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Christian Brauner --- fs/afs/cell.c | 78 +++++++++++++++++++++++++++++++++++++++-------- fs/afs/dynroot.c | 3 +- fs/afs/internal.h | 12 +++++++- fs/afs/mntpt.c | 3 +- fs/afs/proc.c | 3 +- fs/afs/super.c | 2 +- fs/afs/vl_alias.c | 3 +- 7 files changed, 86 insertions(+), 18 deletions(-) diff --git a/fs/afs/cell.c b/fs/afs/cell.c index f31359922e98..d9b6fa1088b7 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -229,7 +229,7 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, * @name: The name of the cell. * @namesz: The strlen of the cell name. * @vllist: A colon/comma separated list of numeric IP addresses or NULL. - * @excl: T if an error should be given if the cell name already exists. + * @reason: The reason we're doing the lookup * @trace: The reason to be logged if the lookup is successful. * * Look up a cell record by name and query the DNS for VL server addresses if @@ -239,7 +239,8 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, */ struct afs_cell *afs_lookup_cell(struct afs_net *net, const char *name, unsigned int namesz, - const char *vllist, bool excl, + const char *vllist, + enum afs_lookup_cell_for reason, enum afs_cell_trace trace) { struct afs_cell *cell, *candidate, *cursor; @@ -247,12 +248,18 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, enum afs_cell_state state; int ret, n; - _enter("%s,%s", name, vllist); + _enter("%s,%s,%u", name, vllist, reason); - if (!excl) { + if (reason != AFS_LOOKUP_CELL_PRELOAD) { cell = afs_find_cell(net, name, namesz, trace); - if (!IS_ERR(cell)) + if (!IS_ERR(cell)) { + if (reason == AFS_LOOKUP_CELL_DYNROOT) + goto no_wait; + if (cell->state == AFS_CELL_SETTING_UP || + cell->state == AFS_CELL_UNLOOKED) + goto lookup_cell; goto wait_for_cell; + } } /* Assume we're probably going to create a cell and preallocate and @@ -298,26 +305,69 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, rb_insert_color(&cell->net_node, &net->cells); up_write(&net->cells_lock); - afs_queue_cell(cell, afs_cell_trace_queue_new); +lookup_cell: + if (reason != AFS_LOOKUP_CELL_PRELOAD && + reason != AFS_LOOKUP_CELL_ROOTCELL) { + set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags); + afs_queue_cell(cell, afs_cell_trace_queue_new); + } wait_for_cell: - _debug("wait_for_cell"); state = smp_load_acquire(&cell->state); /* vs error */ - if (state != AFS_CELL_ACTIVE && - state != AFS_CELL_DEAD) { + switch (state) { + case AFS_CELL_ACTIVE: + case AFS_CELL_DEAD: + break; + case AFS_CELL_UNLOOKED: + default: + if (reason == AFS_LOOKUP_CELL_PRELOAD || + reason == AFS_LOOKUP_CELL_ROOTCELL) + break; + _debug("wait_for_cell"); afs_see_cell(cell, afs_cell_trace_wait); wait_var_event(&cell->state, ({ state = smp_load_acquire(&cell->state); /* vs error */ state == AFS_CELL_ACTIVE || state == AFS_CELL_DEAD; })); + _debug("waited_for_cell %d %d", cell->state, cell->error); } +no_wait: /* Check the state obtained from the wait check. */ + state = smp_load_acquire(&cell->state); /* vs error */ if (state == AFS_CELL_DEAD) { ret = cell->error; goto error; } + if (state == AFS_CELL_ACTIVE) { + switch (cell->dns_status) { + case DNS_LOOKUP_NOT_DONE: + if (cell->dns_source == DNS_RECORD_FROM_CONFIG) { + ret = 0; + break; + } + fallthrough; + default: + ret = -EIO; + goto error; + case DNS_LOOKUP_GOOD: + case DNS_LOOKUP_GOOD_WITH_BAD: + ret = 0; + break; + case DNS_LOOKUP_GOT_NOT_FOUND: + ret = -ENOENT; + goto error; + case DNS_LOOKUP_BAD: + ret = -EREMOTEIO; + goto error; + case DNS_LOOKUP_GOT_LOCAL_FAILURE: + case DNS_LOOKUP_GOT_TEMP_FAILURE: + case DNS_LOOKUP_GOT_NS_FAILURE: + ret = -EDESTADDRREQ; + goto error; + } + } _leave(" = %p [cell]", cell); return cell; @@ -325,7 +375,7 @@ struct afs_cell *afs_lookup_cell(struct afs_net *net, cell_already_exists: _debug("cell exists"); cell = cursor; - if (excl) { + if (reason == AFS_LOOKUP_CELL_PRELOAD) { ret = -EEXIST; } else { afs_use_cell(cursor, trace); @@ -384,7 +434,8 @@ int afs_cell_init(struct afs_net *net, const char *rootcell) return -EINVAL; /* allocate a cell record for the root/workstation cell */ - new_root = afs_lookup_cell(net, rootcell, len, vllist, false, + new_root = afs_lookup_cell(net, rootcell, len, vllist, + AFS_LOOKUP_CELL_ROOTCELL, afs_cell_trace_use_lookup_ws); if (IS_ERR(new_root)) { _leave(" = %ld", PTR_ERR(new_root)); @@ -777,6 +828,7 @@ static bool afs_manage_cell(struct afs_cell *cell) switch (cell->state) { case AFS_CELL_SETTING_UP: goto set_up_cell; + case AFS_CELL_UNLOOKED: case AFS_CELL_ACTIVE: goto cell_is_active; case AFS_CELL_REMOVING: @@ -797,7 +849,7 @@ static bool afs_manage_cell(struct afs_cell *cell) goto remove_cell; } - afs_set_cell_state(cell, AFS_CELL_ACTIVE); + afs_set_cell_state(cell, AFS_CELL_UNLOOKED); cell_is_active: if (afs_has_cell_expired(cell, &next_manage)) @@ -807,6 +859,8 @@ static bool afs_manage_cell(struct afs_cell *cell) ret = afs_update_cell(cell); if (ret < 0) cell->error = ret; + if (cell->state == AFS_CELL_UNLOOKED) + afs_set_cell_state(cell, AFS_CELL_ACTIVE); } if (next_manage < TIME64_MAX && cell->net->live) { diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 8c6130789fde..dc9d29e3739e 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -108,7 +108,8 @@ static struct dentry *afs_dynroot_lookup_cell(struct inode *dir, struct dentry * dotted = true; } - cell = afs_lookup_cell(net, name, len, NULL, false, + cell = afs_lookup_cell(net, name, len, NULL, + AFS_LOOKUP_CELL_DYNROOT, afs_cell_trace_use_lookup_dynroot); if (IS_ERR(cell)) { ret = PTR_ERR(cell); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a45ae5c2ef8a..b92f96f56767 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -343,6 +343,7 @@ extern const char afs_init_sysname[]; enum afs_cell_state { AFS_CELL_SETTING_UP, + AFS_CELL_UNLOOKED, AFS_CELL_ACTIVE, AFS_CELL_REMOVING, AFS_CELL_DEAD, @@ -1049,9 +1050,18 @@ static inline bool afs_cb_is_broken(unsigned int cb_break, extern int afs_cell_init(struct afs_net *, const char *); extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned, enum afs_cell_trace); +enum afs_lookup_cell_for { + AFS_LOOKUP_CELL_DYNROOT, + AFS_LOOKUP_CELL_MOUNTPOINT, + AFS_LOOKUP_CELL_DIRECT_MOUNT, + AFS_LOOKUP_CELL_PRELOAD, + AFS_LOOKUP_CELL_ROOTCELL, + AFS_LOOKUP_CELL_ALIAS_CHECK, +}; struct afs_cell *afs_lookup_cell(struct afs_net *net, const char *name, unsigned int namesz, - const char *vllist, bool excl, + const char *vllist, + enum afs_lookup_cell_for reason, enum afs_cell_trace trace); extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace); void afs_unuse_cell(struct afs_cell *cell, enum afs_cell_trace reason); diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 1ad048e6e164..57c204a3c04e 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -107,7 +107,8 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) if (size > AFS_MAXCELLNAME) return -ENAMETOOLONG; - cell = afs_lookup_cell(ctx->net, p, size, NULL, false, + cell = afs_lookup_cell(ctx->net, p, size, NULL, + AFS_LOOKUP_CELL_MOUNTPOINT, afs_cell_trace_use_lookup_mntpt); if (IS_ERR(cell)) { pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt); diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 40e879c8ca77..44520549b509 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -122,7 +122,8 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size) if (strcmp(buf, "add") == 0) { struct afs_cell *cell; - cell = afs_lookup_cell(net, name, strlen(name), args, true, + cell = afs_lookup_cell(net, name, strlen(name), args, + AFS_LOOKUP_CELL_PRELOAD, afs_cell_trace_use_lookup_add); if (IS_ERR(cell)) { ret = PTR_ERR(cell); diff --git a/fs/afs/super.c b/fs/afs/super.c index da407f2d6f0d..d672b7ab57ae 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -290,7 +290,7 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) /* lookup the cell record */ if (cellname) { cell = afs_lookup_cell(ctx->net, cellname, cellnamesz, - NULL, false, + NULL, AFS_LOOKUP_CELL_DIRECT_MOUNT, afs_cell_trace_use_lookup_mount); if (IS_ERR(cell)) { pr_err("kAFS: unable to lookup cell '%*.*s'\n", diff --git a/fs/afs/vl_alias.c b/fs/afs/vl_alias.c index 709b4cdb723e..fc9676abd252 100644 --- a/fs/afs/vl_alias.c +++ b/fs/afs/vl_alias.c @@ -269,7 +269,8 @@ static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key) if (!name_len || name_len > AFS_MAXCELLNAME) master = ERR_PTR(-EOPNOTSUPP); else - master = afs_lookup_cell(cell->net, cell_name, name_len, NULL, false, + master = afs_lookup_cell(cell->net, cell_name, name_len, NULL, + AFS_LOOKUP_CELL_ALIAS_CHECK, afs_cell_trace_use_lookup_canonical); kfree(cell_name); if (IS_ERR(master)) From e7dbfe6f15b4df34bb169d180bd10f1a3c043814 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 29 Oct 2025 07:50:20 +0100 Subject: [PATCH 0524/1075] spi: intel: Add support for Oak Stream SPI serial flash Add Oak Stream PCI ID to the driver list of supported devices. This patch was originally written by Zeng Guang. Signed-off-by: Heikki Krogerus Signed-off-by: Mika Westerberg Link: https://patch.msgid.link/20251029065020.2920213-1-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 7765fb27c37c..b8c572394aac 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -80,6 +80,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x5794), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x5825), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7723), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, From 07ad45e06b4039adf96882aefcb1d3299fb7c305 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 27 Oct 2025 23:08:38 +0800 Subject: [PATCH 0525/1075] s390/mm: Fix memory leak in add_marker() when kvrealloc() fails The function has a memory leak when kvrealloc() fails. The function directly assigns NULL to the markers pointer, losing the reference to the previously allocated memory. This causes kvfree() in pt_dump_init() to free NULL instead of the leaked memory. Fix by: 1. Using kvrealloc() uniformly for all allocations 2. Using a temporary variable to preserve the original pointer until allocation succeeds 3. Removing the error path that sets markers_cnt=0 to keep consistency between markers and markers_cnt Found via static analysis and this is similar to commit 42378a9ca553 ("bpf, verifier: Fix memory leak in array reallocation for stack state") Fixes: d0e7915d2ad3 ("s390/mm/ptdump: Generate address marker array dynamically") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Signed-off-by: Heiko Carstens --- arch/s390/mm/dump_pagetables.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 9af2aae0a515..528d7c70979f 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -291,16 +291,14 @@ static int ptdump_cmp(const void *a, const void *b) static int add_marker(unsigned long start, unsigned long end, const char *name) { - size_t oldsize, newsize; + struct addr_marker *new; + size_t newsize; - oldsize = markers_cnt * sizeof(*markers); - newsize = oldsize + 2 * sizeof(*markers); - if (!oldsize) - markers = kvmalloc(newsize, GFP_KERNEL); - else - markers = kvrealloc(markers, newsize, GFP_KERNEL); - if (!markers) - goto error; + newsize = (markers_cnt + 2) * sizeof(*markers); + new = kvrealloc(markers, newsize, GFP_KERNEL); + if (!new) + return -ENOMEM; + markers = new; markers[markers_cnt].is_start = 1; markers[markers_cnt].start_address = start; markers[markers_cnt].size = end - start; @@ -312,9 +310,6 @@ static int add_marker(unsigned long start, unsigned long end, const char *name) markers[markers_cnt].name = name; markers_cnt++; return 0; -error: - markers_cnt = 0; - return -ENOMEM; } static int pt_dump_init(void) From 34ab4c75588c07cca12884f2bf6b0347c7a13872 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 23 Oct 2025 22:25:49 +0900 Subject: [PATCH 0526/1075] bfs: Reconstruct file type when loading from disk syzbot is reporting that S_IFMT bits of inode->i_mode can become bogus when the S_IFMT bits of the 32bits "mode" field loaded from disk are corrupted or when the 32bits "attributes" field loaded from disk are corrupted. A documentation says that BFS uses only lower 9 bits of the "mode" field. But I can't find an explicit explanation that the unused upper 23 bits (especially, the S_IFMT bits) are initialized with 0. Therefore, ignore the S_IFMT bits of the "mode" field loaded from disk. Also, verify that the value of the "attributes" field loaded from disk is either BFS_VREG or BFS_VDIR (because BFS supports only regular files and the root directory). Reported-by: syzbot+895c23f6917da440ed0d@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=895c23f6917da440ed0d Signed-off-by: Tetsuo Handa Link: https://patch.msgid.link/fabce673-d5b9-4038-8287-0fd65d80203b@I-love.SAKURA.ne.jp Reviewed-by: Tigran Aivazian Signed-off-by: Christian Brauner --- fs/bfs/inode.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 1d41ce477df5..984b365df046 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -61,7 +61,19 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino) off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; di = (struct bfs_inode *)bh->b_data + off; - inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode); + /* + * https://martin.hinner.info/fs/bfs/bfs-structure.html explains that + * BFS in SCO UnixWare environment used only lower 9 bits of di->i_mode + * value. This means that, although bfs_write_inode() saves whole + * inode->i_mode bits (which include S_IFMT bits and S_IS{UID,GID,VTX} + * bits), middle 7 bits of di->i_mode value can be garbage when these + * bits were not saved by bfs_write_inode(). + * Since we can't tell whether middle 7 bits are garbage, use only + * lower 12 bits (i.e. tolerate S_IS{UID,GID,VTX} bits possibly being + * garbage) and reconstruct S_IFMT bits for Linux environment from + * di->i_vtype value. + */ + inode->i_mode = 0x00000FFF & le32_to_cpu(di->i_mode); if (le32_to_cpu(di->i_vtype) == BFS_VDIR) { inode->i_mode |= S_IFDIR; inode->i_op = &bfs_dir_inops; @@ -71,6 +83,11 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &bfs_file_inops; inode->i_fop = &bfs_file_operations; inode->i_mapping->a_ops = &bfs_aops; + } else { + brelse(bh); + printf("Unknown vtype=%u %s:%08lx\n", + le32_to_cpu(di->i_vtype), inode->i_sb->s_id, ino); + goto error; } BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); From 514f1dc8f2ca3101e04cdf452e53baca3a76e544 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Oct 2025 17:18:10 +0200 Subject: [PATCH 0527/1075] netfilter: nft_ct: enable labels for get case too conntrack labels can only be set when the conntrack has been created with the "ctlabel" extension. For older iptables (connlabel match), adding an "-m connlabel" rule turns on the ctlabel extension allocation for all future conntrack entries. For nftables, its only enabled for 'ct label set foo', but not for 'ct label foo' (i.e. check). But users could have a ruleset that only checks for presence, and rely on userspace to set a label bit via ctnetlink infrastructure. This doesn't work without adding a dummy 'ct label set' rule. We could also enable extension infra for the first (failing) ctnetlink request, but unlike ruleset we would not be able to disable the extension again. Therefore turn on ctlabel extension allocation if an nftables ruleset checks for a connlabel too. Fixes: 1ad8f48df6f6 ("netfilter: nftables: add connlabel set support") Reported-by: Antonio Ojea Closes: https://lore.kernel.org/netfilter-devel/aPi_VdZpVjWujZ29@strlen.de/ Signed-off-by: Florian Westphal --- net/netfilter/nft_ct.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d526e69a2a2b..a418eb3d612b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -379,6 +379,14 @@ static bool nft_ct_tmpl_alloc_pcpu(void) } #endif +static void __nft_ct_get_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + if (priv->key == NFT_CT_LABELS) + nf_connlabels_put(ctx->net); +#endif +} + static int nft_ct_get_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -413,6 +421,10 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_DIRECTION] != NULL) return -EINVAL; len = NF_CT_LABELS_MAX_SIZE; + + err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); + if (err) + return err; break; #endif case NFT_CT_HELPER: @@ -494,7 +506,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, case IP_CT_DIR_REPLY: break; default: - return -EINVAL; + err = -EINVAL; + goto err; } } @@ -502,11 +515,11 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); if (err < 0) - return err; + goto err; err = nf_ct_netns_get(ctx->net, ctx->family); if (err < 0) - return err; + goto err; if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS || @@ -514,6 +527,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, nf_ct_set_acct(ctx->net, true); return 0; +err: + __nft_ct_get_destroy(ctx, priv); + return err; } static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) @@ -626,6 +642,9 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, static void nft_ct_get_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { + struct nft_ct *priv = nft_expr_priv(expr); + + __nft_ct_get_destroy(ctx, priv); nf_ct_netns_put(ctx->net, ctx->family); } From 8d96dfdcabef00e28f0c851b1502adb679dfc6d9 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Fri, 24 Oct 2025 17:54:39 +0200 Subject: [PATCH 0528/1075] netfilter: nft_connlimit: fix possible data race on connection count nft_connlimit_eval() reads priv->list->count to check if the connection limit has been exceeded. This value is being read without a lock and can be modified by a different process. Use READ_ONCE() for correctness. Fixes: df4a90250976 ("netfilter: nf_conncount: merge lookup and add functions") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/netfilter/nft_connlimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 92b984fa8175..fc35a11cdca2 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -48,7 +48,7 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, return; } - count = priv->list->count; + count = READ_ONCE(priv->list->count); if ((count > priv->limit) ^ priv->invert) { regs->verdict.code = NFT_BREAK; From 90918e3b6404c2a37837b8f11692471b4c512de2 Mon Sep 17 00:00:00 2001 From: Andrii Melnychenko Date: Fri, 24 Oct 2025 18:22:16 +0200 Subject: [PATCH 0529/1075] netfilter: nft_ct: add seqadj extension for natted connections Sequence adjustment may be required for FTP traffic with PASV/EPSV modes. due to need to re-write packet payload (IP, port) on the ftp control connection. This can require changes to the TCP length and expected seq / ack_seq. The easiest way to reproduce this issue is with PASV mode. Example ruleset: table inet ftp_nat { ct helper ftp_helper { type "ftp" protocol tcp l3proto inet } chain prerouting { type filter hook prerouting priority 0; policy accept; tcp dport 21 ct state new ct helper set "ftp_helper" } } table ip nat { chain prerouting { type nat hook prerouting priority -100; policy accept; tcp dport 21 dnat ip prefix to ip daddr map { 192.168.100.1 : 192.168.13.2/32 } } chain postrouting { type nat hook postrouting priority 100 ; policy accept; tcp sport 21 snat ip prefix to ip saddr map { 192.168.13.2 : 192.168.100.1/32 } } } Note that the ftp helper gets assigned *after* the dnat setup. The inverse (nat after helper assign) is handled by an existing check in nf_nat_setup_info() and will not show the problem. Topoloy: +-------------------+ +----------------------------------+ | FTP: 192.168.13.2 | <-> | NAT: 192.168.13.3, 192.168.100.1 | +-------------------+ +----------------------------------+ | +-----------------------+ | Client: 192.168.100.2 | +-----------------------+ ftp nat changes do not work as expected in this case: Connected to 192.168.100.1. [..] ftp> epsv EPSV/EPRT on IPv4 off. ftp> ls 227 Entering passive mode (192,168,100,1,209,129). 421 Service not available, remote server has closed connection. Kernel logs: Missing nfct_seqadj_ext_add() setup call WARNING: CPU: 1 PID: 0 at net/netfilter/nf_conntrack_seqadj.c:41 [..] __nf_nat_mangle_tcp_packet+0x100/0x160 [nf_nat] nf_nat_ftp+0x142/0x280 [nf_nat_ftp] help+0x4d1/0x880 [nf_conntrack_ftp] nf_confirm+0x122/0x2e0 [nf_conntrack] nf_hook_slow+0x3c/0xb0 .. Fix this by adding the required extension when a conntrack helper is assigned to a connection that has a nat binding. Fixes: 1a64edf54f55 ("netfilter: nft_ct: add helper set support") Signed-off-by: Andrii Melnychenko Signed-off-by: Florian Westphal --- net/netfilter/nft_ct.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a418eb3d612b..6f2ae7cad731 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -22,6 +22,7 @@ #include #include #include +#include struct nft_ct_helper_obj { struct nf_conntrack_helper *helper4; @@ -1192,6 +1193,10 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj, if (help) { rcu_assign_pointer(help->helper, to_assign); set_bit(IPS_HELPER_BIT, &ct->status); + + if ((ct->status & IPS_NAT_MASK) && !nfct_seqadj(ct)) + if (!nfct_seqadj_ext_add(ct)) + regs->verdict.code = NF_DROP; } } From 22897e568646de5907d4981eae6cc895be2978d1 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Wed, 29 Oct 2025 16:11:34 +0200 Subject: [PATCH 0530/1075] ASoC: renesas: rz-ssi: Use proper dma_buffer_pos after resume When the driver supports DMA, it enqueues four DMA descriptors per substream before the substream is started. New descriptors are enqueued in the DMA completion callback, and each time a new descriptor is queued, the dma_buffer_pos is incremented. During suspend, the DMA transactions are terminated. There might be cases where the four extra enqueued DMA descriptors are not completed and are instead canceled on suspend. However, the cancel operation does not take into account that the dma_buffer_pos was already incremented. Previously, the suspend code reinitialized dma_buffer_pos to zero, but this is not always correct. To avoid losing any audio periods during suspend/resume and to prevent clip sound, save the completed DMA buffer position in the DMA callback and reinitialize dma_buffer_pos on resume. Cc: stable@vger.kernel.org Fixes: 1fc778f7c833a ("ASoC: renesas: rz-ssi: Add suspend to RAM support") Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20251029141134.2556926-3-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Mark Brown --- sound/soc/renesas/rz-ssi.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/sound/soc/renesas/rz-ssi.c b/sound/soc/renesas/rz-ssi.c index e00940814157..81b883e8ac92 100644 --- a/sound/soc/renesas/rz-ssi.c +++ b/sound/soc/renesas/rz-ssi.c @@ -85,6 +85,7 @@ struct rz_ssi_stream { struct snd_pcm_substream *substream; int fifo_sample_size; /* sample capacity of SSI FIFO */ int dma_buffer_pos; /* The address for the next DMA descriptor */ + int completed_dma_buf_pos; /* The address of the last completed DMA descriptor. */ int period_counter; /* for keeping track of periods transferred */ int sample_width; int buffer_pos; /* current frame position in the buffer */ @@ -215,6 +216,7 @@ static void rz_ssi_stream_init(struct rz_ssi_stream *strm, rz_ssi_set_substream(strm, substream); strm->sample_width = samples_to_bytes(runtime, 1); strm->dma_buffer_pos = 0; + strm->completed_dma_buf_pos = 0; strm->period_counter = 0; strm->buffer_pos = 0; @@ -437,6 +439,10 @@ static void rz_ssi_pointer_update(struct rz_ssi_stream *strm, int frames) snd_pcm_period_elapsed(strm->substream); strm->period_counter = current_period; } + + strm->completed_dma_buf_pos += runtime->period_size; + if (strm->completed_dma_buf_pos >= runtime->buffer_size) + strm->completed_dma_buf_pos = 0; } static int rz_ssi_pio_recv(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) @@ -778,10 +784,14 @@ static int rz_ssi_dma_request(struct rz_ssi_priv *ssi, struct device *dev) return -ENODEV; } -static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi) +static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi, struct rz_ssi_stream *strm) { + struct snd_pcm_substream *substream = strm->substream; + struct snd_pcm_runtime *runtime = substream->runtime; int ret; + strm->dma_buffer_pos = strm->completed_dma_buf_pos + runtime->period_size; + if (rz_ssi_is_stream_running(&ssi->playback) || rz_ssi_is_stream_running(&ssi->capture)) return 0; @@ -794,16 +804,6 @@ static int rz_ssi_trigger_resume(struct rz_ssi_priv *ssi) ssi->hw_params_cache.channels); } -static void rz_ssi_streams_suspend(struct rz_ssi_priv *ssi) -{ - if (rz_ssi_is_stream_running(&ssi->playback) || - rz_ssi_is_stream_running(&ssi->capture)) - return; - - ssi->playback.dma_buffer_pos = 0; - ssi->capture.dma_buffer_pos = 0; -} - static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai) { @@ -813,7 +813,7 @@ static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd, switch (cmd) { case SNDRV_PCM_TRIGGER_RESUME: - ret = rz_ssi_trigger_resume(ssi); + ret = rz_ssi_trigger_resume(ssi, strm); if (ret) return ret; @@ -852,7 +852,6 @@ static int rz_ssi_dai_trigger(struct snd_pcm_substream *substream, int cmd, case SNDRV_PCM_TRIGGER_SUSPEND: rz_ssi_stop(ssi, strm); - rz_ssi_streams_suspend(ssi); break; case SNDRV_PCM_TRIGGER_STOP: From 5e5c8aa73d99f1daa9f2ec1474b7fc1a6952764b Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Wed, 29 Oct 2025 14:46:36 +0000 Subject: [PATCH 0531/1075] ASoC: dt-bindings: pm4125-sdw: correct number of soundwire ports For some reason we ended up limiting the number of soundwire ports to 2 in the bindings, the actual codec supports 4 rx and 5 tx ports. Fixes: 88d0d17192c5 ("ASoC: dt-bindings: add bindings for pm4125 audio codec") Signed-off-by: Srinivas Kandagatla Link: https://patch.msgid.link/20251029144636.357203-1-srinivas.kandagatla@oss.qualcomm.com Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml b/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml index 23624f32ac30..769e4cb5b99b 100644 --- a/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,pm4125-sdw.yaml @@ -32,7 +32,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 2 - maxItems: 2 + maxItems: 4 items: enum: [1, 2, 3, 4] @@ -48,7 +48,7 @@ properties: $ref: /schemas/types.yaml#/definitions/uint32-array minItems: 2 - maxItems: 2 + maxItems: 5 items: enum: [1, 2, 3, 4, 5] From 9db8d46712d274a27d1d22c38e70211f20d508c2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Oct 2025 15:23:36 +0200 Subject: [PATCH 0532/1075] mnt: Remove dead code which might prevent from building Clang, in particular, is not happy about dead code: fs/namespace.c:135:37: error: unused function 'node_to_mnt_ns' [-Werror,-Wunused-function] 135 | static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node) | ^~~~~~~~~~~~~~ 1 error generated. Remove a leftover from the previous cleanup. Fixes: 7d7d16498958 ("mnt: support ns lookup") Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20251024132336.1666382-1-andriy.shevchenko@linux.intel.com Signed-off-by: Christian Brauner --- fs/namespace.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 5b5ab2ae238b..cc6e00e72437 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -132,16 +132,6 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); -static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node) -{ - struct ns_common *ns; - - if (!node) - return NULL; - ns = rb_entry(node, struct ns_common, ns_tree_node); - return container_of(ns, struct mnt_namespace, ns); -} - static void mnt_ns_release(struct mnt_namespace *ns) { /* keep alive for {list,stat}mount() */ From 9222582ec524707fbb9d076febead5b6a07611ed Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 28 Oct 2025 14:07:44 +0800 Subject: [PATCH 0533/1075] Revert "wifi: ath12k: Fix missing station power save configuration" This reverts commit 4b66d18918f8e4d85e51974a9e3ce9abad5c7c3d. In [1], Ross Brown reports poor performance of WCN7850 after enabling power save. Temporarily revert the fix; it will be re-enabled once the issue is resolved. Tested-on: WCN7850 hw2.0 PCI WLAN.IOE_HMT.1.1-00011-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1 Fixes: 4b66d18918f8 ("wifi: ath12k: Fix missing station power save configuration") Reported-by: Ross Brown Closes: https://lore.kernel.org/all/CAMn66qZENLhDOcVJuwUZ3ir89PVtVnQRq9DkV5xjJn1p6BKB9w@mail.gmail.com/ # [1] Signed-off-by: Miaoqing Pan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251028060744.897198-1-miaoqing.pan@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 122 ++++++++++++-------------- 1 file changed, 55 insertions(+), 67 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index eacab798630a..db351c922018 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4064,68 +4064,12 @@ static int ath12k_mac_fils_discovery(struct ath12k_link_vif *arvif, return ret; } -static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) -{ - struct ath12k *ar = arvif->ar; - struct ieee80211_vif *vif = arvif->ahvif->vif; - struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; - enum wmi_sta_powersave_param param; - struct ieee80211_bss_conf *info; - enum wmi_sta_ps_mode psmode; - int ret; - int timeout; - bool enable_ps; - - lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - - if (vif->type != NL80211_IFTYPE_STATION) - return; - - enable_ps = arvif->ahvif->ps; - if (enable_ps) { - psmode = WMI_STA_PS_MODE_ENABLED; - param = WMI_STA_PS_PARAM_INACTIVITY_TIME; - - timeout = conf->dynamic_ps_timeout; - if (timeout == 0) { - info = ath12k_mac_get_link_bss_conf(arvif); - if (!info) { - ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", - vif->addr, arvif->link_id); - return; - } - - /* firmware doesn't like 0 */ - timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; - } - - ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, - timeout); - if (ret) { - ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", - arvif->vdev_id, ret); - return; - } - } else { - psmode = WMI_STA_PS_MODE_DISABLED; - } - - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", - arvif->vdev_id, psmode ? "enable" : "disable"); - - ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); - if (ret) - ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", - psmode, arvif->vdev_id, ret); -} - static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u64 changed) { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); unsigned long links = ahvif->links_map; - struct ieee80211_vif_cfg *vif_cfg; struct ieee80211_bss_conf *info; struct ath12k_link_vif *arvif; struct ieee80211_sta *sta; @@ -4189,24 +4133,61 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, } } } +} - if (changed & BSS_CHANGED_PS) { - links = ahvif->links_map; - vif_cfg = &vif->cfg; +static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) +{ + struct ath12k *ar = arvif->ar; + struct ieee80211_vif *vif = arvif->ahvif->vif; + struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; + enum wmi_sta_powersave_param param; + struct ieee80211_bss_conf *info; + enum wmi_sta_ps_mode psmode; + int ret; + int timeout; + bool enable_ps; - for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { - arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); - if (!arvif || !arvif->ar) - continue; + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - ar = arvif->ar; + if (vif->type != NL80211_IFTYPE_STATION) + return; - if (ar->ab->hw_params->supports_sta_ps) { - ahvif->ps = vif_cfg->ps; - ath12k_mac_vif_setup_ps(arvif); + enable_ps = arvif->ahvif->ps; + if (enable_ps) { + psmode = WMI_STA_PS_MODE_ENABLED; + param = WMI_STA_PS_PARAM_INACTIVITY_TIME; + + timeout = conf->dynamic_ps_timeout; + if (timeout == 0) { + info = ath12k_mac_get_link_bss_conf(arvif); + if (!info) { + ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", + vif->addr, arvif->link_id); + return; } + + /* firmware doesn't like 0 */ + timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; } + + ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, + timeout); + if (ret) { + ath12k_warn(ar->ab, "failed to set inactivity time for vdev %d: %i\n", + arvif->vdev_id, ret); + return; + } + } else { + psmode = WMI_STA_PS_MODE_DISABLED; } + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %d psmode %s\n", + arvif->vdev_id, psmode ? "enable" : "disable"); + + ret = ath12k_wmi_pdev_set_ps_mode(ar, arvif->vdev_id, psmode); + if (ret) + ath12k_warn(ar->ab, "failed to set sta power save mode %d for vdev %d: %d\n", + psmode, arvif->vdev_id, ret); } static bool ath12k_mac_supports_tpc(struct ath12k *ar, struct ath12k_vif *ahvif, @@ -4228,6 +4209,7 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, { struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); + struct ieee80211_vif_cfg *vif_cfg = &vif->cfg; struct cfg80211_chan_def def; u32 param_id, param_value; enum nl80211_band band; @@ -4514,6 +4496,12 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, } ath12k_mac_fils_discovery(arvif, info); + + if (changed & BSS_CHANGED_PS && + ar->ab->hw_params->supports_sta_ps) { + ahvif->ps = vif_cfg->ps; + ath12k_mac_vif_setup_ps(arvif); + } } static struct ath12k_vif_cache *ath12k_ahvif_get_link_cache(struct ath12k_vif *ahvif, From f4fa7c25f632cd925352b4d46f245653a23b1d1a Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Wed, 29 Oct 2025 14:08:43 +0100 Subject: [PATCH 0534/1075] sched_ext: Fix use of uninitialized variable in scx_bpf_cpuperf_set() scx_bpf_cpuperf_set() has a typo where it dereferences the local variable @sch, instead of the global @scx_root pointer. Fix by dereferencing the correct variable. Fixes: 956f2b11a8a4f ("sched_ext: Drop kf_cpu_valid()") Signed-off-by: Andrea Righi Reviewed-by: Christian Loehle Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index ecb251e883ea..1a019a7728fb 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -6401,7 +6401,7 @@ __bpf_kfunc void scx_bpf_cpuperf_set(s32 cpu, u32 perf) guard(rcu)(); - sch = rcu_dereference(sch); + sch = rcu_dereference(scx_root); if (unlikely(!sch)) return; From edce042da7984586ea5c7ed18ea5f58002afb969 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 27 Oct 2025 14:12:28 +0100 Subject: [PATCH 0535/1075] drm/xe: Fix uninitialized return value from xe_validation_guard() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit the DEFINE_CLASS() macro creates an inline function and the init args are passed down to it; since _ret is passed as an int, whatever value is set inside the function is not visible to the caller. Pass _ret as a pointer so its value propagates to the caller. Fixes: c460bc2311df ("drm/xe: Introduce an xe_validation wrapper around drm_exec") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/6220 Cc: Maarten Lankhorst Cc: Matthew Brost Cc: intel-xe@lists.freedesktop.org Signed-off-by: Thomas Hellström Reviewed-by: Lucas De Marchi Reviewed-by: Maarten Lankhorst Reviewed-by: Matthew Brost Link: https://patch.msgid.link/20251027131228.12098-1-thomas.hellstrom@linux.intel.com (cherry picked from commit fcb8c304f4673747d535c74b340b5b8a4823727b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_validation.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h index fec331d791e7..b2d09c596714 100644 --- a/drivers/gpu/drm/xe/xe_validation.h +++ b/drivers/gpu/drm/xe/xe_validation.h @@ -166,10 +166,10 @@ xe_validation_device_init(struct xe_validation_device *val) */ DEFINE_CLASS(xe_validation, struct xe_validation_ctx *, if (_T) xe_validation_ctx_fini(_T);, - ({_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); - _ret ? NULL : _ctx; }), + ({*_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags); + *_ret ? NULL : _ctx; }), struct xe_validation_ctx *_ctx, struct xe_validation_device *_val, - struct drm_exec *_exec, const struct xe_val_flags _flags, int _ret); + struct drm_exec *_exec, const struct xe_val_flags _flags, int *_ret); static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) {return *_T; } #define class_xe_validation_is_conditional true @@ -186,7 +186,7 @@ static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T) * exhaustive eviction. */ #define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \ - scoped_guard(xe_validation, _ctx, _val, _exec, _flags, _ret) \ + scoped_guard(xe_validation, _ctx, _val, _exec, _flags, &_ret) \ drm_exec_until_all_locked(_exec) #endif From 35e4a69b2003f20a69e7d19ae96ab1eef1aa8e8d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 28 Oct 2025 21:52:31 +0100 Subject: [PATCH 0536/1075] PM: sleep: Allow pm_restrict_gfp_mask() stacking Allow pm_restrict_gfp_mask() to be called many times in a row to avoid issues with calling dpm_suspend_start() when the GFP mask has been already restricted. Only the first invocation of pm_restrict_gfp_mask() will actually restrict the GFP mask and the subsequent calls will warn if there is a mismatch between the expected allowed GFP mask and the actual one. Moreover, if pm_restrict_gfp_mask() is called many times in a row, pm_restore_gfp_mask() needs to be called matching number of times in a row to actually restore the GFP mask. Calling it when the GFP mask has not been restricted will cause it to warn. This is necessary for the GFP mask restriction starting in hibernation_snapshot() to continue throughout the entire hibernation flow until it completes or it is aborted (either by a wakeup event or by an error). Fixes: 449c9c02537a1 ("PM: hibernate: Restrict GFP mask in hibernation_snapshot()") Fixes: 469d80a3712c ("PM: hibernate: Fix hybrid-sleep") Reported-by: Askar Safin Closes: https://lore.kernel.org/linux-pm/20251025050812.421905-1-safinaskar@gmail.com/ Link: https://lore.kernel.org/linux-pm/20251028111730.2261404-1-safinaskar@gmail.com/ Signed-off-by: Rafael J. Wysocki Reviewed-by: Mario Limonciello (AMD) Tested-by: Mario Limonciello (AMD) Cc: 6.16+ # 6.16+ Link: https://patch.msgid.link/5935682.DvuYhMxLoT@rafael.j.wysocki --- kernel/power/hibernate.c | 4 ---- kernel/power/main.c | 22 +++++++++++++++++----- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 14e85ff23551..53166ef86ba4 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -706,7 +706,6 @@ static void power_down(void) #ifdef CONFIG_SUSPEND if (hibernation_mode == HIBERNATION_SUSPEND) { - pm_restore_gfp_mask(); error = suspend_devices_and_enter(mem_sleep_current); if (!error) goto exit; @@ -746,9 +745,6 @@ static void power_down(void) cpu_relax(); exit: - /* Match the pm_restore_gfp_mask() call in hibernate(). */ - pm_restrict_gfp_mask(); - /* Restore swap signature. */ error = swsusp_unmark(); if (error) diff --git a/kernel/power/main.c b/kernel/power/main.c index 3cf2d7e72567..549f51ca3a1e 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -31,23 +31,35 @@ * held, unless the suspend/hibernate code is guaranteed not to run in parallel * with that modification). */ +static unsigned int saved_gfp_count; static gfp_t saved_gfp_mask; void pm_restore_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); - if (saved_gfp_mask) { - gfp_allowed_mask = saved_gfp_mask; - saved_gfp_mask = 0; - } + + if (WARN_ON(!saved_gfp_count) || --saved_gfp_count) + return; + + gfp_allowed_mask = saved_gfp_mask; + saved_gfp_mask = 0; + + pm_pr_dbg("GFP mask restored\n"); } void pm_restrict_gfp_mask(void) { WARN_ON(!mutex_is_locked(&system_transition_mutex)); - WARN_ON(saved_gfp_mask); + + if (saved_gfp_count++) { + WARN_ON((saved_gfp_mask & ~(__GFP_IO | __GFP_FS)) != gfp_allowed_mask); + return; + } + saved_gfp_mask = gfp_allowed_mask; gfp_allowed_mask &= ~(__GFP_IO | __GFP_FS); + + pm_pr_dbg("GFP mask restricted\n"); } unsigned int lock_system_sleep(void) From b3fbda1a630a9439c885b2a5dc5230cc49a87e9e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 21 Oct 2025 17:55:37 -0700 Subject: [PATCH 0537/1075] drm/xe: Do not wake device during a GT reset Waking the device during a GT reset can lead to unintended memory allocation, which is not allowed since GT resets occur in the reclaim path. Prevent this by holding a PM reference while a reset is in flight. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20251022005538.828980-3-matthew.brost@intel.com (cherry picked from commit 480b358e7d8ef69fd8f1b0cad6e07c7d70a36ee4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 3e0ad7e5b5df..6d3db5e55d98 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -813,12 +813,16 @@ static int gt_reset(struct xe_gt *gt) unsigned int fw_ref; int err; - if (xe_device_wedged(gt_to_xe(gt))) - return -ECANCELED; + if (xe_device_wedged(gt_to_xe(gt))) { + err = -ECANCELED; + goto err_pm_put; + } /* We only support GT resets with GuC submission */ - if (!xe_device_uc_enabled(gt_to_xe(gt))) - return -ENODEV; + if (!xe_device_uc_enabled(gt_to_xe(gt))) { + err = -ENODEV; + goto err_pm_put; + } xe_gt_info(gt, "reset started\n"); @@ -826,8 +830,6 @@ static int gt_reset(struct xe_gt *gt) if (!err) xe_gt_warn(gt, "reset block failed to get lifted"); - xe_pm_runtime_get(gt_to_xe(gt)); - if (xe_fault_inject_gt_reset()) { err = -ECANCELED; goto err_fail; @@ -874,6 +876,7 @@ static int gt_reset(struct xe_gt *gt) xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err)); xe_device_declare_wedged(gt_to_xe(gt)); +err_pm_put: xe_pm_runtime_put(gt_to_xe(gt)); return err; @@ -895,7 +898,9 @@ void xe_gt_reset_async(struct xe_gt *gt) return; xe_gt_info(gt, "reset queued\n"); - queue_work(gt->ordered_wq, >->reset.worker); + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + if (!queue_work(gt->ordered_wq, >->reset.worker)) + xe_pm_runtime_put(gt_to_xe(gt)); } void xe_gt_suspend_prepare(struct xe_gt *gt) From 056d76f7cb9fa68c8bbf85b4055aeb84af6139d5 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 1 Oct 2025 01:59:42 +0200 Subject: [PATCH 0538/1075] drm/panel: sitronix-st7789v: fix sync flags for t28cp45tn89 I planned to set the polarity of horizontal and vertical sync, but accidentally described vertical sync twice with different polarity instead. Note, that there is no functional change, because the driver only makes use of DRM_MODE_FLAG_P[HV]SYNC to divert from the default active-low polarity. Reported-by: Laurent Pinchart Closes: https://lore.kernel.org/all/20250923132616.GH20765@pendragon.ideasonboard.com/ Fixes: a411558cc143 ("drm/panel: sitronix-st7789v: add Inanbo T28CP45TN89 support") Reviewed-by: Laurent Pinchart Reviewed-by: Marek Vasut Signed-off-by: Sebastian Reichel Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20251001-t28cp45tn89-fix-v2-1-67fe8e3046ca@collabora.com --- drivers/gpu/drm/panel/panel-sitronix-st7789v.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c index 04d91929eedd..d5f821d6b23c 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c @@ -249,6 +249,11 @@ static const struct drm_display_mode default_mode = { .flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC, }; +/* + * The mode data for this panel has been reverse engineered without access + * to the panel datasheet / manual. Using DRM_MODE_FLAG_PHSYNC like all + * other panels results in garbage data on the display. + */ static const struct drm_display_mode t28cp45tn89_mode = { .clock = 6008, .hdisplay = 240, @@ -261,7 +266,7 @@ static const struct drm_display_mode t28cp45tn89_mode = { .vtotal = 320 + 8 + 4 + 4, .width_mm = 43, .height_mm = 57, - .flags = DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_NVSYNC, + .flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC, }; static const struct drm_display_mode et028013dma_mode = { From d8eb00bbc628c8916ab0da2bb80059d48aa4512e Mon Sep 17 00:00:00 2001 From: Sebastian Fleer Date: Wed, 29 Oct 2025 13:35:28 +0100 Subject: [PATCH 0539/1075] drm/panel: kingdisplay-kd097d04: Disable EoTp Since commit d97e71e44937 ("drm/bridge: synopsys: dw-mipi-dsi: enable EoTp by default") panel output on an Acer Chromebook Tab 10 (google-dru) is corrupted. The tablet I use is equipped with a kingdisplay-kd097d04 panel, disabling EoTp restores the correct functionality. Fixes: 2a994cbed6b2 ("drm/panel: Add Kingdisplay KD097D04 panel driver") Suggested-by: Jens Reidel Signed-off-by: Sebastian Fleer Reviewed-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20251029124007.232333-1-dev@dwurp.de --- drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c index 2fc7b0779b37..893af9b16756 100644 --- a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c +++ b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c @@ -359,7 +359,7 @@ static int kingdisplay_panel_probe(struct mipi_dsi_device *dsi) dsi->lanes = 4; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM; + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET; kingdisplay = devm_drm_panel_alloc(&dsi->dev, __typeof(*kingdisplay), base, &kingdisplay_panel_funcs, From f3903ec76ae6afcdba0347681d1dda005fb145cd Mon Sep 17 00:00:00 2001 From: Rae Moar Date: Tue, 28 Oct 2025 19:40:10 +0000 Subject: [PATCH 0540/1075] MAINTAINERS: Update KUnit email address for Rae Moar Update Rae's email address for the KUnit entry. Also add an entry to .mailmap to map former google email to current gmail address. Signed-off-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Shuah Khan --- .mailmap | 1 + MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index d2edd256b19d..2fcf7e4a5cfd 100644 --- a/.mailmap +++ b/.mailmap @@ -642,6 +642,7 @@ Qais Yousef Quentin Monnet Quentin Monnet Quentin Perret +Rae Moar Rafael J. Wysocki Rajeev Nandan Rajendra Nayak diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..eefcff990987 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13601,7 +13601,7 @@ F: fs/smb/server/ KERNEL UNIT TESTING FRAMEWORK (KUnit) M: Brendan Higgins M: David Gow -R: Rae Moar +R: Rae Moar L: linux-kselftest@vger.kernel.org L: kunit-dev@googlegroups.com S: Maintained From a6f0459aadf1b41a9b9fae02006b1db024d60856 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 12:57:59 +0100 Subject: [PATCH 0541/1075] mptcp: fix subflow rcvbuf adjust The mptcp PM can add subflow to the conn_list before tcp_init_transfer(). Calling tcp_rcvbuf_grow() on such subflow is not correct as later init will overwrite the update. Fix the issue calling tcp_rcvbuf_grow() only after init buffer initialization. Fixes: e118cdc34dd1 ("mptcp: rcvbuf auto-tuning improvement") Signed-off-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-1-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0292162a14ee..a8a3bdf95543 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2051,6 +2051,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) msk->rcvq_space.space = msk->rcvq_space.copied; if (mptcp_rcvbuf_grow(sk)) { + int copied = msk->rcvq_space.copied; /* Make subflows follow along. If we do not do this, we * get drops at subflow level if skbs can't be moved to @@ -2063,8 +2064,11 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ssk = mptcp_subflow_tcp_sock(subflow); slow = lock_sock_fast(ssk); - tcp_sk(ssk)->rcvq_space.space = msk->rcvq_space.copied; - tcp_rcvbuf_grow(ssk); + /* subflows can be added before tcp_init_transfer() */ + if (tcp_sk(ssk)->rcvq_space.space) { + tcp_sk(ssk)->rcvq_space.space = copied; + tcp_rcvbuf_grow(ssk); + } unlock_sock_fast(ssk, slow); } } From 24990d89c23de4dbef6b0b3d58383cafefdd6983 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:00 +0100 Subject: [PATCH 0542/1075] trace: tcp: add three metrics to trace_tcp_rcvbuf_grow() While chasing yet another receive autotuning bug, I found useful to add rcv_ssthresh, window_clamp and rcv_wnd. tcp_stream 40597 [068] 2172.978198: tcp:tcp_rcvbuf_grow: time=50307 rtt_us=50179 copied=77824 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=107474 window_clamp=112128 rcv_wnd=110592 tcp_stream 40597 [068] 2173.028528: tcp:tcp_rcvbuf_grow: time=50336 rtt_us=50206 copied=110592 inq=0 space=77824 ooo=0 scaling_ratio=219 rcvbuf=509444 rcv_ssthresh=328658 window_clamp=435813 rcv_wnd=331776 tcp_stream 40597 [068] 2173.078830: tcp:tcp_rcvbuf_grow: time=50305 rtt_us=50070 copied=270336 inq=0 space=110592 ooo=0 scaling_ratio=219 rcvbuf=509444 rcv_ssthresh=431159 window_clamp=435813 rcv_wnd=434176 tcp_stream 40597 [068] 2173.129137: tcp:tcp_rcvbuf_grow: time=50313 rtt_us=50118 copied=434176 inq=0 space=270336 ooo=0 scaling_ratio=219 rcvbuf=2457847 rcv_ssthresh=1299511 window_clamp=2102611 rcv_wnd=1302528 tcp_stream 40597 [068] 2173.179451: tcp:tcp_rcvbuf_grow: time=50318 rtt_us=50041 copied=1019904 inq=0 space=434176 ooo=0 scaling_ratio=219 rcvbuf=2457847 rcv_ssthresh=2087445 window_clamp=2102611 rcv_wnd=2088960 Signed-off-by: Eric Dumazet Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-2-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 9d2c36c6a0ed..6757233bd064 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -218,6 +218,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, __field(__u32, space) __field(__u32, ooo_space) __field(__u32, rcvbuf) + __field(__u32, rcv_ssthresh) + __field(__u32, window_clamp) + __field(__u32, rcv_wnd) __field(__u8, scaling_ratio) __field(__u16, sport) __field(__u16, dport) @@ -245,6 +248,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, tp->rcv_nxt; __entry->rcvbuf = sk->sk_rcvbuf; + __entry->rcv_ssthresh = tp->rcv_ssthresh; + __entry->window_clamp = tp->window_clamp; + __entry->rcv_wnd = tp->rcv_wnd; __entry->scaling_ratio = tp->scaling_ratio; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); @@ -264,11 +270,14 @@ TRACE_EVENT(tcp_rcvbuf_grow, ), TP_printk("time=%u rtt_us=%u copied=%u inq=%u space=%u ooo=%u scaling_ratio=%u rcvbuf=%u " + "rcv_ssthresh=%u window_clamp=%u rcv_wnd=%u " "family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 " "saddrv6=%pI6c daddrv6=%pI6c skaddr=%p sock_cookie=%llx", __entry->time, __entry->rtt_us, __entry->copied, __entry->inq, __entry->space, __entry->ooo_space, __entry->scaling_ratio, __entry->rcvbuf, + __entry->rcv_ssthresh, __entry->window_clamp, + __entry->rcv_wnd, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, From b1e014a1f3275a6f3d0f2b30b8117447fc3915f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:01 +0100 Subject: [PATCH 0543/1075] tcp: add newval parameter to tcp_rcvbuf_grow() This patch has no functional change, and prepares the following one. tcp_rcvbuf_grow() will need to have access to tp->rcvq_space.space old and new values. Change mptcp_rcvbuf_grow() in a similar way. Signed-off-by: Eric Dumazet [ Moved 'oldval' declaration to the next patch to avoid warnings at build time. ] Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-3-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- net/ipv4/tcp_input.c | 14 +++++++------- net/mptcp/protocol.c | 20 ++++++++------------ 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5ca230ed526a..ab20f549b8f9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,7 +370,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); -void tcp_rcvbuf_grow(struct sock *sk); +void tcp_rcvbuf_grow(struct sock *sk, u32 newval); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 31ea5af49f2d..cb4e07f84ae2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -891,18 +891,20 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, } } -void tcp_rcvbuf_grow(struct sock *sk) +void tcp_rcvbuf_grow(struct sock *sk, u32 newval) { const struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); - int rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap; + + tp->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return; /* slow start: allow the sender to double its rate. */ - rcvwin = tp->rcvq_space.space << 1; + rcvwin = newval << 1; if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; @@ -943,9 +945,7 @@ void tcp_rcv_space_adjust(struct sock *sk) trace_tcp_rcvbuf_grow(sk, time); - tp->rcvq_space.space = copied; - - tcp_rcvbuf_grow(sk); + tcp_rcvbuf_grow(sk, copied); new_measure: tp->rcvq_space.seq = tp->copied_seq; @@ -5270,7 +5270,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) } /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ if (sk->sk_socket) - tcp_rcvbuf_grow(sk); + tcp_rcvbuf_grow(sk, tp->rcvq_space.space); } static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a8a3bdf95543..052a0c62023f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -194,17 +194,18 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, * - mptcp does not maintain a msk-level window clamp * - returns true when the receive buffer is actually updated */ -static bool mptcp_rcvbuf_grow(struct sock *sk) +static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval) { struct mptcp_sock *msk = mptcp_sk(sk); const struct net *net = sock_net(sk); - int rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap; + msk->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return false; - rcvwin = msk->rcvq_space.space << 1; + rcvwin = newval << 1; if (!RB_EMPTY_ROOT(&msk->out_of_order_queue)) rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq; @@ -334,7 +335,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) skb_set_owner_r(skb, sk); /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ if (sk->sk_socket) - mptcp_rcvbuf_grow(sk); + mptcp_rcvbuf_grow(sk, msk->rcvq_space.space); } static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset, @@ -2049,10 +2050,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (msk->rcvq_space.copied <= msk->rcvq_space.space) goto new_measure; - msk->rcvq_space.space = msk->rcvq_space.copied; - if (mptcp_rcvbuf_grow(sk)) { - int copied = msk->rcvq_space.copied; - + if (mptcp_rcvbuf_grow(sk, msk->rcvq_space.copied)) { /* Make subflows follow along. If we do not do this, we * get drops at subflow level if skbs can't be moved to * the mptcp rx queue fast enough (announced rcv_win can @@ -2065,10 +2063,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ssk = mptcp_subflow_tcp_sock(subflow); slow = lock_sock_fast(ssk); /* subflows can be added before tcp_init_transfer() */ - if (tcp_sk(ssk)->rcvq_space.space) { - tcp_sk(ssk)->rcvq_space.space = copied; - tcp_rcvbuf_grow(ssk); - } + if (tcp_sk(ssk)->rcvq_space.space) + tcp_rcvbuf_grow(ssk, msk->rcvq_space.copied); unlock_sock_fast(ssk, slow); } } From aa251c84636c326471ca9d53723816ba8fffe2bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:02 +0100 Subject: [PATCH 0544/1075] tcp: fix too slow tcp_rcvbuf_grow() action While the blamed commits apparently avoided an overshoot, they also limited how fast a sender can increase BDP at each RTT. This is not exactly a revert, we do not add the 16 * tp->advmss cushion we had, and we are keeping the out_of_order_queue contribution. Do the same in mptcp_rcvbuf_grow(). Tested: emulated 50ms rtt (tcp_stream --tcp-tx-delay 50000), cubic 20 second flow. net.ipv4.tcp_rmem set to "4096 131072 67000000" perf record -a -e tcp:tcp_rcvbuf_grow sleep 20 perf script Before: We can see we fail to roughly double RWIN at each RTT. Sender is RWIN limited while CWND is ramping up (before getting tcp_wmem limited). tcp_stream 33793 [010] 825.717525: tcp:tcp_rcvbuf_grow: time=100869 rtt_us=50428 copied=49152 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=103970 window_clamp=112128 rcv_wnd=106496 tcp_stream 33793 [010] 825.768966: tcp:tcp_rcvbuf_grow: time=51447 rtt_us=50362 copied=86016 inq=0 space=49152 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=107474 window_clamp=112128 rcv_wnd=106496 tcp_stream 33793 [010] 825.821539: tcp:tcp_rcvbuf_grow: time=52577 rtt_us=50243 copied=114688 inq=0 space=86016 ooo=0 scaling_ratio=219 rcvbuf=201096 rcv_ssthresh=167377 window_clamp=172031 rcv_wnd=167936 tcp_stream 33793 [010] 825.871781: tcp:tcp_rcvbuf_grow: time=50248 rtt_us=50237 copied=167936 inq=0 space=114688 ooo=0 scaling_ratio=219 rcvbuf=268129 rcv_ssthresh=224722 window_clamp=229375 rcv_wnd=225280 tcp_stream 33793 [010] 825.922475: tcp:tcp_rcvbuf_grow: time=50698 rtt_us=50183 copied=241664 inq=0 space=167936 ooo=0 scaling_ratio=219 rcvbuf=392617 rcv_ssthresh=331217 window_clamp=335871 rcv_wnd=323584 tcp_stream 33793 [010] 825.973326: tcp:tcp_rcvbuf_grow: time=50855 rtt_us=50213 copied=339968 inq=0 space=241664 ooo=0 scaling_ratio=219 rcvbuf=564986 rcv_ssthresh=478674 window_clamp=483327 rcv_wnd=462848 tcp_stream 33793 [010] 826.023970: tcp:tcp_rcvbuf_grow: time=50647 rtt_us=50248 copied=491520 inq=0 space=339968 ooo=0 scaling_ratio=219 rcvbuf=794811 rcv_ssthresh=671778 window_clamp=679935 rcv_wnd=651264 tcp_stream 33793 [010] 826.074612: tcp:tcp_rcvbuf_grow: time=50648 rtt_us=50227 copied=700416 inq=0 space=491520 ooo=0 scaling_ratio=219 rcvbuf=1149124 rcv_ssthresh=974881 window_clamp=983039 rcv_wnd=942080 tcp_stream 33793 [010] 826.125452: tcp:tcp_rcvbuf_grow: time=50845 rtt_us=50225 copied=987136 inq=8192 space=700416 ooo=0 scaling_ratio=219 rcvbuf=1637502 rcv_ssthresh=1392674 window_clamp=1400831 rcv_wnd=1339392 tcp_stream 33793 [010] 826.175698: tcp:tcp_rcvbuf_grow: time=50250 rtt_us=50198 copied=1347584 inq=0 space=978944 ooo=0 scaling_ratio=219 rcvbuf=2288672 rcv_ssthresh=1949729 window_clamp=1957887 rcv_wnd=1945600 tcp_stream 33793 [010] 826.225947: tcp:tcp_rcvbuf_grow: time=50252 rtt_us=50240 copied=1945600 inq=0 space=1347584 ooo=0 scaling_ratio=219 rcvbuf=3150516 rcv_ssthresh=2687010 window_clamp=2695167 rcv_wnd=2691072 tcp_stream 33793 [010] 826.276175: tcp:tcp_rcvbuf_grow: time=50233 rtt_us=50224 copied=2691072 inq=0 space=1945600 ooo=0 scaling_ratio=219 rcvbuf=4548617 rcv_ssthresh=3883041 window_clamp=3891199 rcv_wnd=3887104 tcp_stream 33793 [010] 826.326403: tcp:tcp_rcvbuf_grow: time=50233 rtt_us=50229 copied=3887104 inq=0 space=2691072 ooo=0 scaling_ratio=219 rcvbuf=6291456 rcv_ssthresh=5370482 window_clamp=5382144 rcv_wnd=5373952 tcp_stream 33793 [010] 826.376723: tcp:tcp_rcvbuf_grow: time=50323 rtt_us=50218 copied=5373952 inq=0 space=3887104 ooo=0 scaling_ratio=219 rcvbuf=9087658 rcv_ssthresh=7755537 window_clamp=7774207 rcv_wnd=7757824 tcp_stream 33793 [010] 826.426991: tcp:tcp_rcvbuf_grow: time=50274 rtt_us=50196 copied=7757824 inq=180224 space=5373952 ooo=0 scaling_ratio=219 rcvbuf=12563759 rcv_ssthresh=10729233 window_clamp=10747903 rcv_wnd=10575872 tcp_stream 33793 [010] 826.477229: tcp:tcp_rcvbuf_grow: time=50241 rtt_us=50078 copied=10731520 inq=180224 space=7577600 ooo=0 scaling_ratio=219 rcvbuf=17715667 rcv_ssthresh=15136529 window_clamp=15155199 rcv_wnd=14983168 tcp_stream 33793 [010] 826.527482: tcp:tcp_rcvbuf_grow: time=50258 rtt_us=50153 copied=15138816 inq=360448 space=10551296 ooo=0 scaling_ratio=219 rcvbuf=24667870 rcv_ssthresh=21073410 window_clamp=21102591 rcv_wnd=20766720 tcp_stream 33793 [010] 826.577712: tcp:tcp_rcvbuf_grow: time=50234 rtt_us=50228 copied=21073920 inq=0 space=14778368 ooo=0 scaling_ratio=219 rcvbuf=34550339 rcv_ssthresh=29517041 window_clamp=29556735 rcv_wnd=29519872 tcp_stream 33793 [010] 826.627982: tcp:tcp_rcvbuf_grow: time=50275 rtt_us=50220 copied=29519872 inq=540672 space=21073920 ooo=0 scaling_ratio=219 rcvbuf=49268707 rcv_ssthresh=42090625 window_clamp=42147839 rcv_wnd=41627648 tcp_stream 33793 [010] 826.678274: tcp:tcp_rcvbuf_grow: time=50296 rtt_us=50185 copied=42053632 inq=761856 space=28979200 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57238168 window_clamp=57316406 rcv_wnd=56606720 tcp_stream 33793 [010] 826.728627: tcp:tcp_rcvbuf_grow: time=50357 rtt_us=50128 copied=43913216 inq=851968 space=41291776 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56524800 tcp_stream 33793 [010] 827.131364: tcp:tcp_rcvbuf_grow: time=50239 rtt_us=50127 copied=43843584 inq=655360 space=43061248 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56696832 tcp_stream 33793 [010] 827.181613: tcp:tcp_rcvbuf_grow: time=50254 rtt_us=50115 copied=43843584 inq=524288 space=43188224 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56807424 tcp_stream 33793 [010] 828.339635: tcp:tcp_rcvbuf_grow: time=50283 rtt_us=50110 copied=43843584 inq=458752 space=43319296 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56864768 tcp_stream 33793 [010] 828.440350: tcp:tcp_rcvbuf_grow: time=50404 rtt_us=50099 copied=43843584 inq=393216 space=43384832 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56922112 tcp_stream 33793 [010] 829.195106: tcp:tcp_rcvbuf_grow: time=50154 rtt_us=50077 copied=43843584 inq=196608 space=43450368 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=57090048 After: It takes few steps to increase RWIN. Sender is no longer RWIN limited. tcp_stream 50826 [010] 935.634212: tcp:tcp_rcvbuf_grow: time=100788 rtt_us=50315 copied=49152 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=103970 window_clamp=112128 rcv_wnd=106496 tcp_stream 50826 [010] 935.685642: tcp:tcp_rcvbuf_grow: time=51437 rtt_us=50361 copied=86016 inq=0 space=49152 ooo=0 scaling_ratio=219 rcvbuf=160875 rcv_ssthresh=132969 window_clamp=137623 rcv_wnd=131072 tcp_stream 50826 [010] 935.738299: tcp:tcp_rcvbuf_grow: time=52660 rtt_us=50256 copied=139264 inq=0 space=86016 ooo=0 scaling_ratio=219 rcvbuf=502741 rcv_ssthresh=411497 window_clamp=430079 rcv_wnd=413696 tcp_stream 50826 [010] 935.788544: tcp:tcp_rcvbuf_grow: time=50249 rtt_us=50233 copied=307200 inq=0 space=139264 ooo=0 scaling_ratio=219 rcvbuf=728690 rcv_ssthresh=618717 window_clamp=623371 rcv_wnd=618496 tcp_stream 50826 [010] 935.838796: tcp:tcp_rcvbuf_grow: time=50258 rtt_us=50202 copied=618496 inq=0 space=307200 ooo=0 scaling_ratio=219 rcvbuf=2450338 rcv_ssthresh=1855709 window_clamp=2096187 rcv_wnd=1859584 tcp_stream 50826 [010] 935.889140: tcp:tcp_rcvbuf_grow: time=50347 rtt_us=50166 copied=1261568 inq=0 space=618496 ooo=0 scaling_ratio=219 rcvbuf=4376503 rcv_ssthresh=3725291 window_clamp=3743961 rcv_wnd=3706880 tcp_stream 50826 [010] 935.939435: tcp:tcp_rcvbuf_grow: time=50300 rtt_us=50185 copied=2478080 inq=24576 space=1261568 ooo=0 scaling_ratio=219 rcvbuf=9082648 rcv_ssthresh=7733731 window_clamp=7769921 rcv_wnd=7692288 tcp_stream 50826 [010] 935.989681: tcp:tcp_rcvbuf_grow: time=50251 rtt_us=50221 copied=4915200 inq=114688 space=2453504 ooo=0 scaling_ratio=219 rcvbuf=16574936 rcv_ssthresh=14108110 window_clamp=14179339 rcv_wnd=14024704 tcp_stream 50826 [010] 936.039967: tcp:tcp_rcvbuf_grow: time=50289 rtt_us=50279 copied=9830400 inq=114688 space=4800512 ooo=0 scaling_ratio=219 rcvbuf=32695050 rcv_ssthresh=27896187 window_clamp=27969593 rcv_wnd=27815936 tcp_stream 50826 [010] 936.090172: tcp:tcp_rcvbuf_grow: time=50211 rtt_us=50200 copied=19841024 inq=114688 space=9715712 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57245176 window_clamp=57316406 rcv_wnd=57163776 tcp_stream 50826 [010] 936.140430: tcp:tcp_rcvbuf_grow: time=50262 rtt_us=50197 copied=39501824 inq=114688 space=19726336 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57245176 window_clamp=57316406 rcv_wnd=57163776 tcp_stream 50826 [010] 936.190527: tcp:tcp_rcvbuf_grow: time=50101 rtt_us=50071 copied=43655168 inq=262144 space=39387136 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57032704 tcp_stream 50826 [010] 936.240719: tcp:tcp_rcvbuf_grow: time=50197 rtt_us=50057 copied=43843584 inq=262144 space=43393024 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57032704 tcp_stream 50826 [010] 936.341271: tcp:tcp_rcvbuf_grow: time=50297 rtt_us=50123 copied=43843584 inq=131072 space=43581440 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57147392 tcp_stream 50826 [010] 936.642503: tcp:tcp_rcvbuf_grow: time=50131 rtt_us=50084 copied=43843584 inq=0 space=43712512 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57262080 Fixes: 65c5287892e9 ("tcp: fix sk_rcvbuf overshoot") Fixes: e118cdc34dd1 ("mptcp: rcvbuf auto-tuning improvement") Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/589 Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-4-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 11 +++++++++-- net/mptcp/protocol.c | 10 +++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cb4e07f84ae2..e4a979b75cc6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -895,17 +895,24 @@ void tcp_rcvbuf_grow(struct sock *sk, u32 newval) { const struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); - u32 rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap, oldval; + u64 grow; + oldval = tp->rcvq_space.space; tp->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return; - /* slow start: allow the sender to double its rate. */ + /* DRS is always one RTT late. */ rcvwin = newval << 1; + /* slow start: allow the sender to double its rate. */ + grow = (u64)rcvwin * (newval - oldval); + do_div(grow, oldval); + rcvwin += grow << 1; + if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 052a0c62023f..875027b9319c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -198,15 +198,23 @@ static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval) { struct mptcp_sock *msk = mptcp_sk(sk); const struct net *net = sock_net(sk); - u32 rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap, oldval; + u64 grow; + oldval = msk->rcvq_space.space; msk->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return false; + /* DRS is always one RTT late. */ rcvwin = newval << 1; + /* slow start: allow the sender to double its rate. */ + grow = (u64)rcvwin * (newval - oldval); + do_div(grow, oldval); + rcvwin += grow << 1; + if (!RB_EMPTY_ROOT(&msk->out_of_order_queue)) rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq; From a4384d786e38d5ff172f0908aae01c2c30719245 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 21:38:41 +0530 Subject: [PATCH 0545/1075] nfp: xsk: fix memory leak in nfp_net_alloc() In nfp_net_alloc(), the memory allocated for xsk_pools is not freed in the subsequent error paths, leading to a memory leak. Fix that by freeing it in the error path. Fixes: 6402528b7a0b ("nfp: xsk: add AF_XDP zero-copy Rx and Tx support") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251028160845.126919-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 132626a3f9f7..9ef72f294117 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2557,14 +2557,16 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar, &nn->tlv_caps); if (err) - goto err_free_nn; + goto err_free_xsk_pools; err = nfp_ccm_mbox_alloc(nn); if (err) - goto err_free_nn; + goto err_free_xsk_pools; return nn; +err_free_xsk_pools: + kfree(nn->dp.xsk_pools); err_free_nn: if (nn->dp.netdev) free_netdev(nn->dp.netdev); From a43303809868b22bd1303739ba334e982b234d45 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 28 Oct 2025 20:20:27 +0700 Subject: [PATCH 0546/1075] Documentation: netconsole: Remove obsolete contact people Breno Leitao has been listed in MAINTAINERS as netconsole maintainer since 7c938e438c56db ("MAINTAINERS: make Breno the netconsole maintainer"), but the documentation says otherwise that bug reports should be sent to original netconsole authors. Remove obsolate contact info. Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://patch.msgid.link/20251028132027.48102-1-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/netconsole.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index 59cb9982afe6..2555e75e5cc1 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -19,9 +19,6 @@ Userdata append support by Matthew Wood , Jan 22 2024 Sysdata append support by Breno Leitao , Jan 15 2025 -Please send bug reports to Matt Mackall -Satyam Sharma , and Cong Wang - Introduction: ============= From 00764aa5c9bbb2044eb04d6d78584a436666b231 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 28 Oct 2025 15:06:32 -0700 Subject: [PATCH 0547/1075] netconsole: Fix race condition in between reader and writer of userdata The update_userdata() function constructs the complete userdata string in nt->extradata_complete and updates nt->userdata_length. This data is then read by write_msg() and write_ext_msg() when sending netconsole messages. However, update_userdata() was not holding target_list_lock during this process, allowing concurrent message transmission to read partially updated userdata. This race condition could result in netconsole messages containing incomplete or inconsistent userdata - for example, reading the old userdata_length with new extradata_complete content, or vice versa, leading to truncated or corrupted output. Fix this by acquiring target_list_lock with spin_lock_irqsave() before updating extradata_complete and userdata_length, and releasing it after both fields are fully updated. This ensures that readers see a consistent view of the userdata, preventing corruption during concurrent access. The fix aligns with the existing locking pattern used throughout the netconsole code, where target_list_lock protects access to target fields including buf[] and msgcounter that are accessed during message transmission. Also get rid of the unnecessary variable complete_idx, which makes it easier to bail out of update_userdata(). Fixes: df03f830d099 ("net: netconsole: cache userdata formatted string in netconsole_target") Signed-off-by: Gustavo Luiz Duarte Link: https://patch.msgid.link/20251028-netconsole-fix-race-v4-1-63560b0ae1a0@meta.com Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 194570443493..5d8d0214786c 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -886,8 +886,11 @@ static ssize_t userdatum_value_show(struct config_item *item, char *buf) static void update_userdata(struct netconsole_target *nt) { - int complete_idx = 0, child_count = 0; struct list_head *entry; + int child_count = 0; + unsigned long flags; + + spin_lock_irqsave(&target_list_lock, flags); /* Clear the current string in case the last userdatum was deleted */ nt->userdata_length = 0; @@ -897,8 +900,11 @@ static void update_userdata(struct netconsole_target *nt) struct userdatum *udm_item; struct config_item *item; - if (WARN_ON_ONCE(child_count >= MAX_EXTRADATA_ITEMS)) - break; + if (child_count >= MAX_EXTRADATA_ITEMS) { + spin_unlock_irqrestore(&target_list_lock, flags); + WARN_ON_ONCE(1); + return; + } child_count++; item = container_of(entry, struct config_item, ci_entry); @@ -912,12 +918,11 @@ static void update_userdata(struct netconsole_target *nt) * one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is * checked to not exceed MAX items with child_count above */ - complete_idx += scnprintf(&nt->extradata_complete[complete_idx], - MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", - item->ci_name, udm_item->value); + nt->userdata_length += scnprintf(&nt->extradata_complete[nt->userdata_length], + MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", + item->ci_name, udm_item->value); } - nt->userdata_length = strnlen(nt->extradata_complete, - sizeof(nt->extradata_complete)); + spin_unlock_irqrestore(&target_list_lock, flags); } static ssize_t userdatum_value_store(struct config_item *item, const char *buf, From 27b0e701d3872ba59c5b579a9e8a02ea49ad3d3b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:52 +0100 Subject: [PATCH 0548/1075] mptcp: drop bogus optimization in __mptcp_check_push() Accessing the transmit queue without owning the msk socket lock is inherently racy, hence __mptcp_check_push() could actually quit early even when there is pending data. That in turn could cause unexpected tx lock and timeout. Dropping the early check avoids the race, implicitly relaying on later tests under the relevant lock. With such change, all the other mptcp_send_head() call sites are now under the msk socket lock and we can additionally drop the now unneeded annotation on the transmit head pointer accesses. Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Geliang Tang Tested-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-1-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 11 ++++------- net/mptcp/protocol.h | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 875027b9319c..655a2a45224f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1007,7 +1007,7 @@ static void __mptcp_clean_una(struct sock *sk) if (WARN_ON_ONCE(!msk->recovery)) break; - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + msk->first_pending = mptcp_send_next(sk); } dfrag_clear(sk, dfrag); @@ -1552,7 +1552,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, mptcp_update_post_push(msk, dfrag, ret); } - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + msk->first_pending = mptcp_send_next(sk); if (msk->snd_burst <= 0 || !sk_stream_memory_free(ssk) || @@ -1912,7 +1912,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) get_page(dfrag->page); list_add_tail(&dfrag->list, &msk->rtx_queue); if (!msk->first_pending) - WRITE_ONCE(msk->first_pending, dfrag); + msk->first_pending = dfrag; } pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk, dfrag->data_seq, dfrag->data_len, dfrag->already_sent, @@ -2882,7 +2882,7 @@ static void __mptcp_clear_xmit(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - WRITE_ONCE(msk->first_pending, NULL); + msk->first_pending = NULL; list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) dfrag_clear(sk, dfrag); } @@ -3422,9 +3422,6 @@ void __mptcp_data_acked(struct sock *sk) void __mptcp_check_push(struct sock *sk, struct sock *ssk) { - if (!mptcp_send_head(sk)) - return; - if (!sock_owned_by_user(sk)) __mptcp_subflow_push_pending(sk, ssk, false); else diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 52f9cfa4ce95..379a88e14e8d 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -414,7 +414,7 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); - return READ_ONCE(msk->first_pending); + return msk->first_pending; } static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) From 8e04ce45a8db7a080220e86e249198fa676b83dc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:53 +0100 Subject: [PATCH 0549/1075] mptcp: fix MSG_PEEK stream corruption If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the receive queue and recvmsg() need to waits for more data - i.e. it's a blocking one - upon arrival of the next packet the MPTCP protocol will start again copying the oldest data present in the receive queue, corrupting the data stream. Address the issue explicitly tracking the peeked sequence number, restarting from the last peeked byte. Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Geliang Tang Tested-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-2-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 655a2a45224f..2535788569ab 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1945,22 +1945,36 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); -static int __mptcp_recvmsg_mskq(struct sock *sk, - struct msghdr *msg, - size_t len, int flags, +static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, + size_t len, int flags, int copied_total, struct scm_timestamping_internal *tss, int *cmsg_flags) { struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; + int total_data_len = 0; int copied = 0; skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { - u32 offset = MPTCP_SKB_CB(skb)->offset; + u32 delta, offset = MPTCP_SKB_CB(skb)->offset; u32 data_len = skb->len - offset; - u32 count = min_t(size_t, len - copied, data_len); + u32 count; int err; + if (flags & MSG_PEEK) { + /* skip already peeked skbs */ + if (total_data_len + data_len <= copied_total) { + total_data_len += data_len; + continue; + } + + /* skip the already peeked data in the current skb */ + delta = copied_total - total_data_len; + offset += delta; + data_len -= delta; + } + + count = min_t(size_t, len - copied, data_len); if (!(flags & MSG_TRUNC)) { err = skb_copy_datagram_msg(skb, offset, msg, count); if (unlikely(err < 0)) { @@ -1977,16 +1991,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, copied += count; - if (count < data_len) { - if (!(flags & MSG_PEEK)) { + if (!(flags & MSG_PEEK)) { + msk->bytes_consumed += count; + if (count < data_len) { MPTCP_SKB_CB(skb)->offset += count; MPTCP_SKB_CB(skb)->map_seq += count; - msk->bytes_consumed += count; + break; } - break; - } - if (!(flags & MSG_PEEK)) { /* avoid the indirect call, we know the destructor is sock_rfree */ skb->destructor = NULL; skb->sk = NULL; @@ -1994,7 +2006,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, sk_mem_uncharge(sk, skb->truesize); __skb_unlink(skb, &sk->sk_receive_queue); skb_attempt_defer_free(skb); - msk->bytes_consumed += count; } if (copied >= len) @@ -2191,7 +2202,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, while (copied < len) { int err, bytes_read; - bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); + bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, + copied, &tss, &cmsg_flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; From a824084b98d8a1dbd6e85d0842a8eb5e73467f59 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:54 +0100 Subject: [PATCH 0550/1075] mptcp: restore window probe Since commit 72377ab2d671 ("mptcp: more conservative check for zero probes") the MPTCP-level zero window probe check is always disabled, as the TCP-level write queue always contains at least the newly allocated skb. Refine the relevant check tacking in account that the above condition and that such skb can have zero length. Fixes: 72377ab2d671 ("mptcp: more conservative check for zero probes") Cc: stable@vger.kernel.org Reported-by: Geliang Tang Closes: https://lore.kernel.org/d0a814c364e744ca6b836ccd5b6e9146882e8d42.camel@kernel.org Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-3-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2535788569ab..5d8714adae6c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1299,7 +1299,12 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, if (copy == 0) { u64 snd_una = READ_ONCE(msk->snd_una); - if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) { + /* No need for zero probe if there are any data pending + * either at the msk or ssk level; skb is the current write + * queue tail and can be empty at this point. + */ + if (snd_una != msk->snd_nxt || skb->len || + skb != tcp_send_head(ssk)) { tcp_remove_empty_skb(ssk); return 0; } From fe11dfa10919ce594682c76f5f648a0840d80a2b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:55 +0100 Subject: [PATCH 0551/1075] mptcp: zero window probe mib Explicitly account for MPTCP-level zero windows probe, to catch hopefully earlier issues alike the one addressed by the previous patch. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-4-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/mib.c | 1 + net/mptcp/mib.h | 1 + net/mptcp/protocol.c | 1 + 3 files changed, 3 insertions(+) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 6003e47c770a..171643815076 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -85,6 +85,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK), SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK), SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED), + SNMP_MIB_ITEM("WinProbe", MPTCP_MIB_WINPROBE), }; /* mptcp_mib_alloc - allocate percpu mib counters diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 309bac6fea32..a1d3e9369fbb 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -88,6 +88,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */ MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */ MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */ + MPTCP_MIB_WINPROBE, /* MPTCP-level zero window probe */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5d8714adae6c..2d6b8de35c44 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1355,6 +1355,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext->dsn64); if (zero_window_probe) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_WINPROBE); mptcp_subflow_ctx(ssk)->rel_write_seq += copy; mpext->frozen = 1; if (READ_ONCE(msk->csum_enabled)) From dc89548c6926d68dfdda11bebc1a5258bc41d887 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 27 Oct 2025 00:43:16 +0800 Subject: [PATCH 0552/1075] net: usb: asix_devices: Check return value of usbnet_get_endpoints The code did not check the return value of usbnet_get_endpoints. Add checks and return the error if it fails to transfer the error. Found via static anlaysis and this is similar to commit 07161b2416f7 ("sr9800: Add check for usbnet_get_endpoints"). Fixes: 933a27d39e0e ("USB: asix - Add AX88178 support and many other changes") Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Link: https://patch.msgid.link/20251026164318.57624-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_devices.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 85bd5d845409..232bbd79a4de 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -230,7 +230,9 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) int i; unsigned long gpio_bits = dev->driver_info->data; - usbnet_get_endpoints(dev,intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + goto out; /* Toggle the GPIOs in a manufacturer/model specific way */ for (i = 2; i >= 0; i--) { @@ -848,7 +850,9 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) dev->driver_priv = priv; - usbnet_get_endpoints(dev, intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + return ret; /* Maybe the boot loader passed the MAC address via device tree */ if (!eth_platform_get_mac_address(&dev->udev->dev, buf)) { @@ -1281,7 +1285,9 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf) int ret; u8 buf[ETH_ALEN] = {0}; - usbnet_get_endpoints(dev,intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + return ret; /* Get the MAC address */ ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); From 53110232c95ff56067fd96c75a1a1c53d10dcd98 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Sun, 26 Oct 2025 22:20:19 +0200 Subject: [PATCH 0553/1075] net/mlx5: Don't zero user_count when destroying FDB tables esw->user_count tracks how many TC rules are added on an esw via mlx5e_configure_flower -> mlx5_esw_get -> atomic64_inc(&esw->user_count) esw.user_count was unconditionally set to 0 in esw_destroy_legacy_fdb_table and esw_destroy_offloads_fdb_tables. These two together can lead to the following sequence of events: 1. echo 1 > /sys/class/net/eth2/device/sriov_numvfs - mlx5_core_sriov_configure -...-> esw_create_legacy_table -> atomic64_set(&esw->user_count, 0) 2. tc qdisc add dev eth2 ingress && \ tc filter replace dev eth2 pref 1 protocol ip chain 0 ingress \ handle 1 flower action ct nat zone 64000 pipe - mlx5e_configure_flower -> mlx5_esw_get -> atomic64_inc(&esw->user_count) 3. echo 0 > /sys/class/net/eth2/device/sriov_numvfs - mlx5_core_sriov_configure -..-> esw_destroy_legacy_fdb_table -> atomic64_set(&esw->user_count, 0) 4. devlink dev eswitch set pci/0000:08:00.0 mode switchdev - mlx5_devlink_eswitch_mode_set -> mlx5_esw_try_lock -> atomic64_read(&esw->user_count) == 0 - then proceed to a WARN_ON in: esw_offloads_start -> mlx5_eswitch_enable_locke -> esw_offloads_enable -> mlx5_esw_offloads_rep_load -> mlx5e_vport_rep_load -> mlx5e_netdev_change_profile -> mlx5e_detach_netdev -> mlx5e_cleanup_nic_rx -> mlx5e_tc_nic_cleanup -> mlx5e_mod_hdr_tbl_destroy Fix this by not clearing out the user_count when destroying FDB tables, so that the check in mlx5_esw_try_lock can prevent the mode change when there are TC rules configured, as originally intended. Fixes: 2318b8bb94a3 ("net/mlx5: E-switch, Destroy legacy fdb table when needed") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761510019-938772-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 76382626ad41..929adeb50a98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -66,7 +66,6 @@ static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) esw->fdb_table.legacy.addr_grp = NULL; esw->fdb_table.legacy.allmulti_grp = NULL; esw->fdb_table.legacy.promisc_grp = NULL; - atomic64_set(&esw->user_count, 0); } static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 34749814f19b..44a142a041b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1978,7 +1978,6 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); - atomic64_set(&esw->user_count, 0); } static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw) From da2522df3fcc6f57068470cbdcd6516d9eb76b37 Mon Sep 17 00:00:00 2001 From: Jinliang Wang Date: Sun, 26 Oct 2025 23:55:30 -0700 Subject: [PATCH 0554/1075] net: mctp: Fix tx queue stall The tx queue can become permanently stuck in a stopped state due to a race condition between the URB submission path and its completion callback. The URB completion callback can run immediately after usb_submit_urb() returns, before the submitting function calls netif_stop_queue(). If this occurs, the queue state management becomes desynchronized, leading to a stall where the queue is never woken. Fix this by moving the netif_stop_queue() call to before submitting the URB. This closes the race window by ensuring the network stack is aware the queue is stopped before the URB completion can possibly run. Fixes: 0791c0327a6e ("net: mctp: Add MCTP USB transport driver") Signed-off-by: Jinliang Wang Acked-by: Jeremy Kerr Link: https://patch.msgid.link/20251027065530.2045724-1-jinliangw@google.com Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-usb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index 36ccc53b1797..ef860cfc629f 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -96,11 +96,13 @@ static netdev_tx_t mctp_usb_start_xmit(struct sk_buff *skb, skb->data, skb->len, mctp_usb_out_complete, skb); + /* Stops TX queue first to prevent race condition with URB complete */ + netif_stop_queue(dev); rc = usb_submit_urb(urb, GFP_ATOMIC); - if (rc) + if (rc) { + netif_wake_queue(dev); goto err_drop; - else - netif_stop_queue(dev); + } return NETDEV_TX_OK; From 9311e9540a8b406d9f028aa87fb072a3819d4c82 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Mon, 27 Oct 2025 17:57:10 +0800 Subject: [PATCH 0555/1075] selftests: net: use BASH for bareudp testing In bareudp.sh, this script uses /bin/sh and it will load another lib.sh BASH script at the very beginning. But on some operating systems like Ubuntu, /bin/sh is actually pointed to DASH, thus it will try to run BASH commands with DASH and consequently leads to syntax issues: # ./bareudp.sh: 4: ./lib.sh: Bad substitution # ./bareudp.sh: 5: ./lib.sh: source: not found # ./bareudp.sh: 24: ./lib.sh: Syntax error: "(" unexpected Fix this by explicitly using BASH for bareudp.sh. This fixes test execution failures on systems where /bin/sh is not BASH. Reported-by: Edoardo Canepa Link: https://bugs.launchpad.net/bugs/2129812 Signed-off-by: Po-Hsu Lin Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20251027095710.2036108-2-po-hsu.lin@canonical.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bareudp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index 4046131e7888..d9e5b967f815 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Test various bareudp tunnel configurations. From 298574936a6c4ebbe655e15d971ddb1a96c7dc0b Mon Sep 17 00:00:00 2001 From: Thanh Quan Date: Mon, 27 Oct 2025 15:02:43 +0100 Subject: [PATCH 0556/1075] net: phy: dp83869: fix STRAP_OPMODE bitmask According to the TI DP83869HM datasheet Revision D (June 2025), section 7.6.1.41 STRAP_STS Register, the STRAP_OPMODE bitmask is bit [11:9]. Fix this. In case the PHY is auto-detected via PHY ID registers, or not described in DT, or, in case the PHY is described in DT but the optional DT property "ti,op-mode" is not present, then the driver reads out the PHY functional mode (RGMII, SGMII, ...) from hardware straps. Currently, all upstream users of this PHY specify both DT compatible string "ethernet-phy-id2000.a0f1" and ti,op-mode = property, therefore it seems no upstream users are affected by this bug. The driver currently interprets bits [2:0] of STRAP_STS register as PHY functional mode. Those bits are controlled by ANEG_DIS, ANEGSEL_0 straps and an always-zero reserved bit. Systems that use RGMII-to-Copper functional mode are unlikely to disable auto-negotiation via ANEG_DIS strap, or change auto-negotiation behavior via ANEGSEL_0 strap. Therefore, even with this bug in place, the STRAP_STS register content is likely going to be interpreted by the driver as RGMII-to-Copper mode. However, for a system with PHY functional mode strapping set to other mode than RGMII-to-Copper, the driver is likely to misinterpret the strapping as RGMII-to-Copper and misconfigure the PHY. For example, on a system with SGMII-to-Copper strapping, the STRAP_STS register reads as 0x0c20, but the PHY ends up being configured for incompatible RGMII-to-Copper mode. Fixes: 0eaf8ccf2047 ("net: phy: dp83869: Set opmode from straps") Reviewed-by: Andrew Lunn Signed-off-by: Thanh Quan Signed-off-by: Hai Pham Signed-off-by: Marek Vasut # Port from U-Boot to Linux Link: https://patch.msgid.link/20251027140320.8996-1-marek.vasut+renesas@mailbox.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83869.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index a2cd1cc35cde..1f381d7b13ff 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -84,7 +84,7 @@ #define DP83869_CLK_DELAY_DEF 7 /* STRAP_STS1 bits */ -#define DP83869_STRAP_OP_MODE_MASK GENMASK(2, 0) +#define DP83869_STRAP_OP_MODE_MASK GENMASK(11, 9) #define DP83869_STRAP_STS1_RESERVED BIT(11) #define DP83869_STRAP_MIRROR_ENABLED BIT(12) @@ -528,7 +528,7 @@ static int dp83869_set_strapped_mode(struct phy_device *phydev) if (val < 0) return val; - dp83869->mode = val & DP83869_STRAP_OP_MODE_MASK; + dp83869->mode = FIELD_GET(DP83869_STRAP_OP_MODE_MASK, val); return 0; } From fac56c4651ae95f3f2b468c2cf1884cf0e6d18c1 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 23 Oct 2025 18:59:47 -0300 Subject: [PATCH 0557/1075] smb: client: handle lack of IPC in dfs_cache_refresh() In very rare cases, DFS mounts could end up with SMB sessions without any IPC connections. These mounts are only possible when having unexpired cached DFS referrals, hence not requiring any IPC connections during the mount process. Try to establish those missing IPC connections when refreshing DFS referrals. If the server is still rejecting it, then simply ignore and leave expired cached DFS referral for any potential DFS failovers. Reported-by: Jay Shin Signed-off-by: Paulo Alcantara (Red Hat) Cc: David Howells Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsproto.h | 2 ++ fs/smb/client/connect.c | 38 ++++++++++++--------------- fs/smb/client/dfs_cache.c | 55 +++++++++++++++++++++++++++++++++------ 3 files changed, 66 insertions(+), 29 deletions(-) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index fb1813cbe0eb..3528c365a452 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -616,6 +616,8 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, extern struct TCP_Server_Info * cifs_find_tcp_session(struct smb3_fs_context *ctx); +struct cifs_tcon *cifs_setup_ipc(struct cifs_ses *ses, bool seal); + void __cifs_put_smb_ses(struct cifs_ses *ses); extern struct cifs_ses * diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index dd12f3eb61dc..d65ab7e4b1c2 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2015,39 +2015,31 @@ static int match_session(struct cifs_ses *ses, /** * cifs_setup_ipc - helper to setup the IPC tcon for the session * @ses: smb session to issue the request on - * @ctx: the superblock configuration context to use for building the - * new tree connection for the IPC (interprocess communication RPC) + * @seal: if encryption is requested * * A new IPC connection is made and stored in the session * tcon_ipc. The IPC tcon has the same lifetime as the session. */ -static int -cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) +struct cifs_tcon *cifs_setup_ipc(struct cifs_ses *ses, bool seal) { int rc = 0, xid; struct cifs_tcon *tcon; char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0}; - bool seal = false; struct TCP_Server_Info *server = ses->server; /* * If the mount request that resulted in the creation of the * session requires encryption, force IPC to be encrypted too. */ - if (ctx->seal) { - if (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) - seal = true; - else { - cifs_server_dbg(VFS, - "IPC: server doesn't support encryption\n"); - return -EOPNOTSUPP; - } + if (seal && !(server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) { + cifs_server_dbg(VFS, "IPC: server doesn't support encryption\n"); + return ERR_PTR(-EOPNOTSUPP); } /* no need to setup directory caching on IPC share, so pass in false */ tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_ipc); if (tcon == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); spin_lock(&server->srv_lock); scnprintf(unc, sizeof(unc), "\\\\%s\\IPC$", server->hostname); @@ -2057,13 +2049,13 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->ses = ses; tcon->ipc = true; tcon->seal = seal; - rc = server->ops->tree_connect(xid, ses, unc, tcon, ctx->local_nls); + rc = server->ops->tree_connect(xid, ses, unc, tcon, ses->local_nls); free_xid(xid); if (rc) { - cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc); + cifs_server_dbg(VFS | ONCE, "failed to connect to IPC (rc=%d)\n", rc); tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc_fail); - goto out; + return ERR_PTR(rc); } cifs_dbg(FYI, "IPC tcon rc=%d ipc tid=0x%x\n", rc, tcon->tid); @@ -2071,9 +2063,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) spin_lock(&tcon->tc_lock); tcon->status = TID_GOOD; spin_unlock(&tcon->tc_lock); - ses->tcon_ipc = tcon; -out: - return rc; + return tcon; } static struct cifs_ses * @@ -2347,6 +2337,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; + struct cifs_tcon *ipc; struct cifs_ses *ses; unsigned int xid; int retries = 0; @@ -2525,7 +2516,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); - cifs_setup_ipc(ses, ctx); + ipc = cifs_setup_ipc(ses, ctx->seal); + spin_lock(&cifs_tcp_ses_lock); + spin_lock(&ses->ses_lock); + ses->tcon_ipc = !IS_ERR(ipc) ? ipc : NULL; + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); free_xid(xid); diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 4dada26d56b5..f2ad0ccd08a7 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c @@ -1120,24 +1120,63 @@ static bool target_share_equal(struct cifs_tcon *tcon, const char *s1) return match; } -static bool is_ses_good(struct cifs_ses *ses) +static bool is_ses_good(struct cifs_tcon *tcon, struct cifs_ses *ses) { struct TCP_Server_Info *server = ses->server; - struct cifs_tcon *tcon = ses->tcon_ipc; + struct cifs_tcon *ipc = NULL; bool ret; + spin_lock(&cifs_tcp_ses_lock); spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); + ret = !cifs_chan_needs_reconnect(ses, server) && - ses->ses_status == SES_GOOD && - !tcon->need_reconnect; + ses->ses_status == SES_GOOD; + spin_unlock(&ses->chan_lock); + + if (!ret) + goto out; + + if (likely(ses->tcon_ipc)) { + if (ses->tcon_ipc->need_reconnect) { + ret = false; + goto out; + } + } else { + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + + ipc = cifs_setup_ipc(ses, tcon->seal); + + spin_lock(&cifs_tcp_ses_lock); + spin_lock(&ses->ses_lock); + if (!IS_ERR(ipc)) { + if (!ses->tcon_ipc) { + ses->tcon_ipc = ipc; + ipc = NULL; + } + } else { + ret = false; + ipc = NULL; + } + } + +out: spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + if (ipc && server->ops->tree_disconnect) { + unsigned int xid = get_xid(); + + (void)server->ops->tree_disconnect(xid, ipc); + _free_xid(xid); + } + tconInfoFree(ipc, netfs_trace_tcon_ref_free_ipc); return ret; } /* Refresh dfs referral of @ses */ -static void refresh_ses_referral(struct cifs_ses *ses) +static void refresh_ses_referral(struct cifs_tcon *tcon, struct cifs_ses *ses) { struct cache_entry *ce; unsigned int xid; @@ -1153,7 +1192,7 @@ static void refresh_ses_referral(struct cifs_ses *ses) } ses = CIFS_DFS_ROOT_SES(ses); - if (!is_ses_good(ses)) { + if (!is_ses_good(tcon, ses)) { cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n", __func__); goto out; @@ -1241,7 +1280,7 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh) up_read(&htable_rw_lock); ses = CIFS_DFS_ROOT_SES(ses); - if (!is_ses_good(ses)) { + if (!is_ses_good(tcon, ses)) { cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n", __func__); goto out; @@ -1309,7 +1348,7 @@ void dfs_cache_refresh(struct work_struct *work) tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work); list_for_each_entry(ses, &tcon->dfs_ses_list, dlist) - refresh_ses_referral(ses); + refresh_ses_referral(tcon, ses); refresh_tcon_referral(tcon, false); queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, From 895ad6f7083b0c9f1902b23b84136298a492cbeb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Tue, 28 Oct 2025 18:43:46 +0100 Subject: [PATCH 0558/1075] smb: client: call smbd_destroy() in the same splace as kernel_sock_shutdown()/sock_release() With commit b0432201a11b ("smb: client: let destroy_mr_list() keep smbdirect_mr_io memory if registered") the changes from commit 214bab448476 ("cifs: Call MID callback before destroying transport") and commit 1d2a4f57cebd ("cifs:smbd When reconnecting to server, call smbd_destroy() after all MIDs have been called") are no longer needed. And it's better to use the same logic flow, so that the chance of smbdirect related problems is smaller. Fixes: 214bab448476 ("cifs: Call MID callback before destroying transport") Fixes: 1d2a4f57cebd ("cifs:smbd When reconnecting to server, call smbd_destroy() after all MIDs have been called") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/connect.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index d65ab7e4b1c2..55cb4b0cbd48 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -310,6 +310,8 @@ cifs_abort_connection(struct TCP_Server_Info *server) server->ssocket->flags); sock_release(server->ssocket); server->ssocket = NULL; + } else if (cifs_rdma_enabled(server)) { + smbd_destroy(server); } server->sequence_number = 0; server->session_estab = false; @@ -338,12 +340,6 @@ cifs_abort_connection(struct TCP_Server_Info *server) mid_execute_callback(mid); release_mid(mid); } - - if (cifs_rdma_enabled(server)) { - cifs_server_lock(server); - smbd_destroy(server); - cifs_server_unlock(server); - } } static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets) From 34892cfec0c2d96787c4be7bda0d5f18d7dacf85 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:01 +0200 Subject: [PATCH 0559/1075] net: tls: Change async resync helpers argument Update tls_offload_rx_resync_async_request_start() and tls_offload_rx_resync_async_request_end() to get a struct tls_offload_resync_async parameter directly, rather than extracting it from struct sock. This change aligns the function signatures with the upcoming tls_offload_rx_resync_async_request_cancel() helper, which will be introduced in a subsequent patch. Signed-off-by: Shahar Shitrit Reviewed-by: Sabrina Dubroca Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 9 ++++++-- include/net/tls.h | 21 +++++++------------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index d7a11ff9bbdb..5fbc92269585 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -425,12 +425,14 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, { struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; struct mlx5e_ktls_offload_context_rx *priv_rx; + struct tls_offload_context_rx *rx_ctx; u8 tracker_state, auth_state, *ctx; struct device *dev; u32 hw_seq; priv_rx = buf->priv_rx; dev = mlx5_core_dma_dev(sq->channel->mdev); + rx_ctx = tls_offload_ctx_rx(tls_get_ctx(priv_rx->sk)); if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) goto out; @@ -447,7 +449,8 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, } hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); - tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); + tls_offload_rx_resync_async_request_end(rx_ctx->resync_async, + cpu_to_be32(hw_seq)); priv_rx->rq_stats->tls_resync_req_end++; out: mlx5e_ktls_priv_rx_put(priv_rx); @@ -482,6 +485,7 @@ static bool resync_queue_get_psv(struct sock *sk) static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) { struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct tls_offload_resync_async *resync_async; struct net_device *netdev = rq->netdev; struct net *net = dev_net(netdev); struct sock *sk = NULL; @@ -527,7 +531,8 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) seq = th->seq; datalen = skb->len - depth; - tls_offload_rx_resync_async_request_start(sk, seq, datalen); + resync_async = tls_offload_ctx_rx(tls_get_ctx(sk))->resync_async; + tls_offload_rx_resync_async_request_start(resync_async, seq, datalen); rq->stats->tls_resync_req_start++; unref: diff --git a/include/net/tls.h b/include/net/tls.h index 857340338b69..b90f3b675c3c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -451,25 +451,20 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) /* Log all TLS record header TCP sequences in [seq, seq+len] */ static inline void -tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) +tls_offload_rx_resync_async_request_start(struct tls_offload_resync_async *resync_async, + __be32 seq, u16 len) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); - rx_ctx->resync_async->loglen = 0; - rx_ctx->resync_async->rcd_delta = 0; + resync_async->loglen = 0; + resync_async->rcd_delta = 0; } static inline void -tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq) +tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_async, + __be32 seq) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_async->req, - ((u64)ntohl(seq) << 32) | RESYNC_REQ); + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); } static inline void From c15d5c62ab313c19121f10e25d4fec852bd1c40c Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:02 +0200 Subject: [PATCH 0560/1075] net: tls: Cancel RX async resync request on rcd_delta overflow When a netdev issues a RX async resync request for a TLS connection, the TLS module handles it by logging record headers and attempting to match them to the tcp_sn provided by the device. If a match is found, the TLS module approves the tcp_sn for resynchronization. While waiting for a device response, the TLS module also increments rcd_delta each time a new TLS record is received, tracking the distance from the original resync request. However, if the device response is delayed or fails (e.g due to unstable connection and device getting out of tracking, hardware errors, resource exhaustion etc.), the TLS module keeps logging and incrementing, which can lead to a WARN() when rcd_delta exceeds the threshold. To address this, introduce tls_offload_rx_resync_async_request_cancel() to explicitly cancel resync requests when a device response failure is detected. Call this helper also as a final safeguard when rcd_delta crosses its threshold, as reaching this point implies that earlier cancellation did not occur. Signed-off-by: Shahar Shitrit Reviewed-by: Sabrina Dubroca Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/tls.h | 6 ++++++ net/tls/tls_device.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/tls.h b/include/net/tls.h index b90f3b675c3c..c7bcdb3afad7 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -467,6 +467,12 @@ tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_ atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); } +static inline void +tls_offload_rx_resync_async_request_cancel(struct tls_offload_resync_async *resync_async) +{ + atomic64_set(&resync_async->req, 0); +} + static inline void tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index a64ae15b1a60..71734411ff4c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -723,8 +723,10 @@ tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async, /* shouldn't get to wraparound: * too long in async stage, something bad happened */ - if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) + if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) { + tls_offload_rx_resync_async_request_cancel(resync_async); return false; + } /* asynchronous stage: log all headers seq such that * req_seq <= seq <= end_seq, and wait for real resync request From 426e9da3b28404b1edcbae401231fb378150d99d Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:03 +0200 Subject: [PATCH 0561/1075] net/mlx5e: kTLS, Cancel RX async resync request in error flows When device loses track of TLS records, it attempts to resync by monitoring records and requests an asynchronous resynchronization from software for this TLS connection. The TLS module handles such device RX resync requests by logging record headers and comparing them with the record tcp_sn when provided by the device. It also increments rcd_delta to track how far the current record tcp_sn is from the tcp_sn of the original resync request. If the device later responds with a matching tcp_sn, the TLS module approves the tcp_sn for resync. However, the device response may be delayed or never arrive, particularly due to traffic-related issues such as packet drops or reordering. In such cases, the TLS module remains unaware that resync will not complete, and continues performing unnecessary work by logging headers and incrementing rcd_delta, which can eventually exceed the threshold and trigger a WARN(). For example, this was observed when the device got out of tracking, causing mlx5e_ktls_handle_get_psv_completion() to fail and ultimately leading to the rcd_delta warning. To address this, call tls_offload_rx_resync_async_request_cancel() to cancel the resync request and stop resync tracking in such error cases. Also, increment the tls_resync_req_skip counter to track these cancellations. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Shahar Shitrit Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 34 ++++++++++++++++--- .../mellanox/mlx5/core/en_accel/ktls_txrx.h | 4 +++ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 5fbc92269585..da2d1eb52c13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -320,7 +320,6 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, err_free: kfree(buf); err_out: - priv_rx->rq_stats->tls_resync_req_skip++; return err; } @@ -339,14 +338,19 @@ static void resync_handle_work(struct work_struct *work) if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { mlx5e_ktls_priv_rx_put(priv_rx); + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&resync->core); return; } c = resync->priv->channels.c[priv_rx->rxq]; sq = &c->async_icosq; - if (resync_post_get_progress_params(sq, priv_rx)) + if (resync_post_get_progress_params(sq, priv_rx)) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&resync->core); mlx5e_ktls_priv_rx_put(priv_rx); + } } static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, @@ -425,6 +429,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, { struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; struct mlx5e_ktls_offload_context_rx *priv_rx; + struct tls_offload_resync_async *async_resync; struct tls_offload_context_rx *rx_ctx; u8 tracker_state, auth_state, *ctx; struct device *dev; @@ -433,8 +438,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, priv_rx = buf->priv_rx; dev = mlx5_core_dma_dev(sq->channel->mdev); rx_ctx = tls_offload_ctx_rx(tls_get_ctx(priv_rx->sk)); - if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + async_resync = rx_ctx->resync_async; + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(async_resync); goto out; + } dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@ -445,11 +454,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(async_resync); goto out; } hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); - tls_offload_rx_resync_async_request_end(rx_ctx->resync_async, + tls_offload_rx_resync_async_request_end(async_resync, cpu_to_be32(hw_seq)); priv_rx->rq_stats->tls_resync_req_end++; out: @@ -475,8 +485,10 @@ static bool resync_queue_get_psv(struct sock *sk) resync = &priv_rx->resync; mlx5e_ktls_priv_rx_get(priv_rx); - if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) + if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) { mlx5e_ktls_priv_rx_put(priv_rx); + return false; + } return true; } @@ -561,6 +573,18 @@ void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, resync_handle_seq_match(priv_rx, c); } +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_buf *buf; + + buf = wi->tls_get_params.buf; + priv_rx = buf->priv_rx; + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&priv_rx->resync.core); +} + /* End of resync section */ void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index f87b65c560ea..cb08799769ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -29,6 +29,10 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc); + +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi); + static inline bool mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1c79adc51a04..26621a2972ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1036,6 +1036,10 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) netdev_WARN_ONCE(cq->netdev, "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); +#ifdef CONFIG_MLX5_EN_TLS + if (wi->wqe_type == MLX5E_ICOSQ_WQE_GET_PSV_TLS) + mlx5e_ktls_rx_resync_async_request_cancel(wi); +#endif mlx5e_dump_error_cqe(&sq->cq, sq->sqn, (struct mlx5_err_cqe *)cqe); mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); From c657f86106c8729240e1f50a62c6606b578ecf20 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Tue, 28 Oct 2025 11:18:43 +0800 Subject: [PATCH 0562/1075] net: stmmac: vlan: Disable 802.1AD tag insertion offload The DWMAC IP's VLAN tag insertion offload does not support inserting STAG (802.1AD) and CTAG (802.1Q) types in bytes 13 and 14 using the same MAC_VLAN_Incl and MAC_VLAN_Inner_Incl register configurations. Currently, MAC_VLAN_Incl is configured to offload only STAG type insertion. However, the DWMAC IP inserts a CTAG type when the inner VLAN ID field of the descriptor is not configured, and a STAG type when it is configured. This behavior is not documented and leads to inconsistent double VLAN tagging. Additionally, an unexpected CTAG with VLAN ID 0 is inserted, resulting in frames like: Frame 1: 110 bytes on wire (880 bits), 110 bytes captured (880 bits) Ethernet II, Src: (), Dst: () IEEE 802.1ad, ID: 100 802.1Q Virtual LAN, PRI: 0, DEI: 0, ID: 0 (unexpected) 802.1Q Virtual LAN, PRI: 0, DEI: 0, ID: 200 Internet Protocol Version 4, Src: 192.168.4.10, Dst: 192.168.4.11 Internet Control Message Protocol To avoid this undocumented and incorrect behavior, disable 802.1AD tag insertion offload. Also, don't set CSVL bit. As per the data book, when this bit is set, S-VLAN type (0x88A8) is inserted in the 13th and 14th bytes of transmitted packets and when this bit is reset, C-VLAN type (0x8100) is inserted in the 13th and 14th bytes of transmitted packets. Fixes: 30d932279dc2 ("net: stmmac: Add support for VLAN Insertion Offload") Fixes: e94e3f3b51ce ("net: stmmac: Add support for VLAN Insertion Offload in GMAC4+") Fixes: 1d2c7a5fee31 ("net: stmmac: Refactor VLAN implementation") Signed-off-by: Rohan G Thomas Reviewed-by: Boon Khai Ng Link: https://patch.msgid.link/20251028-qbv-fixes-v4-1-26481c7634e3@altera.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 ++++-------------- .../net/ethernet/stmicro/stmmac/stmmac_vlan.c | 2 +- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 650d75b73e0b..5b452469ad2c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4089,18 +4089,11 @@ static int stmmac_release(struct net_device *dev) static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, struct stmmac_tx_queue *tx_q) { - u16 tag = 0x0, inner_tag = 0x0; - u32 inner_type = 0x0; struct dma_desc *p; + u16 tag = 0x0; - if (!priv->dma_cap.vlins) + if (!priv->dma_cap.vlins || !skb_vlan_tag_present(skb)) return false; - if (!skb_vlan_tag_present(skb)) - return false; - if (skb->vlan_proto == htons(ETH_P_8021AD)) { - inner_tag = skb_vlan_tag_get(skb); - inner_type = STMMAC_VLAN_INSERT; - } tag = skb_vlan_tag_get(skb); @@ -4109,7 +4102,7 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, else p = &tx_q->dma_tx[tx_q->cur_tx]; - if (stmmac_set_desc_vlan_tag(priv, p, tag, inner_tag, inner_type)) + if (stmmac_set_desc_vlan_tag(priv, p, tag, 0x0, 0x0)) return false; stmmac_set_tx_owner(priv, p); @@ -7573,11 +7566,8 @@ int stmmac_dvr_probe(struct device *device, ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER; } - if (priv->dma_cap.vlins) { + if (priv->dma_cap.vlins) ndev->features |= NETIF_F_HW_VLAN_CTAG_TX; - if (priv->dma_cap.dvlan) - ndev->features |= NETIF_F_HW_VLAN_STAG_TX; - } #endif priv->msg_enable = netif_msg_init(debug, default_msg_level); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c index 0b6f6228ae35..ff02a79c00d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c @@ -212,7 +212,7 @@ static void vlan_enable(struct mac_device_info *hw, u32 type) value = readl(ioaddr + VLAN_INCL); value |= VLAN_VLTI; - value |= VLAN_CSVL; /* Only use SVLAN */ + value &= ~VLAN_CSVL; /* Only use CVLAN */ value &= ~VLAN_VLC; value |= (type << VLAN_VLC_SHIFT) & VLAN_VLC; writel(value, ioaddr + VLAN_INCL); From ded9813d17d3dd50a08e7a2ca1495769ef9c6673 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Tue, 28 Oct 2025 11:18:44 +0800 Subject: [PATCH 0563/1075] net: stmmac: Consider Tx VLAN offload tag length for maxSDU Queue maxSDU requirement of 802.1 Qbv standard requires mac to drop packets that exceeds maxSDU length and maxSDU doesn't include preamble, destination and source address, or FCS but includes ethernet type and VLAN header. On hardware with Tx VLAN offload enabled, VLAN header length is not included in the skb->len, when Tx VLAN offload is requested. This leads to incorrect length checks and allows transmission of oversized packets. Add the VLAN_HLEN to the skb->len before checking the Qbv maxSDU if Tx VLAN offload is requested for the packet. Fixes: c5c3e1bfc9e0 ("net: stmmac: Offload queueMaxSDU from tc-taprio") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20251028-qbv-fixes-v4-2-26481c7634e3@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5b452469ad2c..7b90ecd3a55e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4500,6 +4500,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) bool has_vlan, set_ic; int entry, first_tx; dma_addr_t des; + u32 sdu_len; tx_q = &priv->dma_conf.tx_queue[queue]; txq_stats = &priv->xstats.txq_stats[queue]; @@ -4517,10 +4518,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) } if (priv->est && priv->est->enable && - priv->est->max_sdu[queue] && - skb->len > priv->est->max_sdu[queue]){ - priv->xstats.max_sdu_txq_drop[queue]++; - goto max_sdu_err; + priv->est->max_sdu[queue]) { + sdu_len = skb->len; + /* Add VLAN tag length if VLAN tag insertion offload is requested */ + if (priv->dma_cap.vlins && skb_vlan_tag_present(skb)) + sdu_len += VLAN_HLEN; + if (sdu_len > priv->est->max_sdu[queue]) { + priv->xstats.max_sdu_txq_drop[queue]++; + goto max_sdu_err; + } } if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { From 48b2e323c018c4c908ae5acabff326647bab5240 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Tue, 28 Oct 2025 11:18:45 +0800 Subject: [PATCH 0564/1075] net: stmmac: est: Fix GCL bounds checks Fix the bounds checks for the hw supported maximum GCL entry count and gate interval time. Fixes: b60189e0392f ("net: stmmac: Integrate EST with TAPRIO scheduler API") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20251028-qbv-fixes-v4-3-26481c7634e3@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 97e89a604abd..3b4d4696afe9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -981,7 +981,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, if (qopt->cmd == TAPRIO_CMD_DESTROY) goto disable; - if (qopt->num_entries >= dep) + if (qopt->num_entries > dep) return -EINVAL; if (!qopt->cycle_time) return -ERANGE; @@ -1012,7 +1012,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, s64 delta_ns = qopt->entries[i].interval; u32 gates = qopt->entries[i].gate_mask; - if (delta_ns > GENMASK(wid, 0)) + if (delta_ns > GENMASK(wid - 1, 0)) return -ERANGE; if (gates > GENMASK(31 - wid, 0)) return -ERANGE; From 6a2108c78069fda000729b88c97b1eba0405e6d7 Mon Sep 17 00:00:00 2001 From: Shivaji Kant Date: Wed, 29 Oct 2025 06:54:19 +0000 Subject: [PATCH 0565/1075] net: devmem: refresh devmem TX dst in case of route invalidation The zero-copy Device Memory (Devmem) transmit path relies on the socket's route cache (`dst_entry`) to validate that the packet is being sent via the network device to which the DMA buffer was bound. However, this check incorrectly fails and returns `-ENODEV` if the socket's route cache entry (`dst`) is merely missing or expired (`dst == NULL`). This scenario is observed during network events, such as when flow steering rules are deleted, leading to a temporary route cache invalidation. This patch fixes -ENODEV error for `net_devmem_get_binding()` by doing the following: 1. It attempts to rebuild the route via `rebuild_header()` if the route is initially missing (`dst == NULL`). This allows the TCP/IP stack to recover from transient route cache misses. 2. It uses `rcu_read_lock()` and `dst_dev_rcu()` to safely access the network device pointer (`dst_dev`) from the route, preventing use-after-free conditions if the device is concurrently removed. 3. It maintains the critical safety check by validating that the retrieved destination device (`dst_dev`) is exactly the device registered in the Devmem binding (`binding->dev`). These changes prevent unnecessary ENODEV failures while maintaining the critical safety requirement that the Devmem resources are only used on the bound network device. Reviewed-by: Bobby Eshleman Reported-by: Eric Dumazet Reported-by: Vedant Mathur Suggested-by: Eric Dumazet Fixes: bd61848900bf ("net: devmem: Implement TX path") Signed-off-by: Shivaji Kant Link: https://patch.msgid.link/20251029065420.3489943-1-shivajikant@google.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index d9de31a6cc7f..1d04754bc756 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "devmem.h" @@ -357,7 +358,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id) { struct net_devmem_dmabuf_binding *binding; - struct dst_entry *dst = __sk_dst_get(sk); + struct net_device *dst_dev; + struct dst_entry *dst; int err = 0; binding = net_devmem_lookup_dmabuf(dmabuf_id); @@ -366,16 +368,35 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, goto out_err; } + rcu_read_lock(); + dst = __sk_dst_get(sk); + /* If dst is NULL (route expired), attempt to rebuild it. */ + if (unlikely(!dst)) { + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) { + err = -EHOSTUNREACH; + goto out_unlock; + } + dst = __sk_dst_get(sk); + if (unlikely(!dst)) { + err = -ENODEV; + goto out_unlock; + } + } + /* The dma-addrs in this binding are only reachable to the corresponding * net_device. */ - if (!dst || !dst->dev || dst->dev->ifindex != binding->dev->ifindex) { + dst_dev = dst_dev_rcu(dst); + if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) { err = -ENODEV; - goto out_err; + goto out_unlock; } + rcu_read_unlock(); return binding; +out_unlock: + rcu_read_unlock(); out_err: if (binding) net_devmem_dmabuf_binding_put(binding); From 39c89ee6e9c4464eb366f4e594379454a6c4db39 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 25 Oct 2025 21:53:18 +0100 Subject: [PATCH 0566/1075] compiler_types: Introduce __nocfi_generic There are two different ways that LLVM can expand kCFI operand bundles in LLVM IR: generically in the middle end or using an architecture specific sequence when lowering LLVM IR to machine code in the backend. The generic pass allows any architecture to take advantage of kCFI but the expansion of these bundles in the middle end can mess with optimizations that may turn indirect calls into direct calls when the call target is known at compile time, such as after inlining. Add __nocfi_generic, dependent on an architecture selecting CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS, to disable kCFI bundle generation in functions where only the generic kCFI pass may cause problems. Link: https://github.com/ClangBuiltLinux/linux/issues/2124 Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20251025-idpf-fix-arm-kcfi-build-error-v1-1-ec57221153ae@kernel.org Signed-off-by: Kees Cook --- arch/Kconfig | 7 +++++++ include/linux/compiler_types.h | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 74ff01133532..61130b88964b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -917,6 +917,13 @@ config ARCH_USES_CFI_TRAPS An architecture should select this option if it requires the .kcfi_traps section for KCFI trap handling. +config ARCH_USES_CFI_GENERIC_LLVM_PASS + bool + help + An architecture should select this option if it uses the generic + KCFIPass in LLVM to expand kCFI bundles instead of architecture-specific + lowering. + config CFI bool "Use Kernel Control Flow Integrity (kCFI)" default CFI_CLANG diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 59288a2c1ad2..1414be493738 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -461,6 +461,12 @@ struct ftrace_likely_data { # define __nocfi #endif +#if defined(CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS) +# define __nocfi_generic __nocfi +#else +# define __nocfi_generic +#endif + /* * Any place that could be marked with the "alloc_size" attribute is also * a place to be marked with the "malloc" attribute, except those that may From 1ed9e6b1004f786d036eaabffb4ee6db9405e117 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 25 Oct 2025 21:53:19 +0100 Subject: [PATCH 0567/1075] ARM: Select ARCH_USES_CFI_GENERIC_LLVM_PASS Prior to clang 22.0.0 [1], ARM did not have an architecture specific kCFI bundle lowering in the backend, which may cause issues. Select CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS to enable use of __nocfi_generic. Link: https://github.com/llvm/llvm-project/commit/d130f402642fba3d065aacb506cb061c899558de [1] Link: https://github.com/ClangBuiltLinux/linux/issues/2124 Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20251025-idpf-fix-arm-kcfi-build-error-v1-2-ec57221153ae@kernel.org Signed-off-by: Kees Cook --- arch/arm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2e3f93b690f4..4fb985b76e97 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -44,6 +44,8 @@ config ARM select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF select ARCH_USE_MEMTEST + # https://github.com/llvm/llvm-project/commit/d130f402642fba3d065aacb506cb061c899558de + select ARCH_USES_CFI_GENERIC_LLVM_PASS if CLANG_VERSION < 220000 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_IPC_PARSE_VERSION From c57f5fee54dfc83ee1d7f70f7beb9410b8466e9e Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 25 Oct 2025 21:53:20 +0100 Subject: [PATCH 0568/1075] libeth: xdp: Disable generic kCFI pass for libeth_xdp_tx_xmit_bulk() When building drivers/net/ethernet/intel/idpf/xsk.c for ARCH=arm with CONFIG_CFI=y using a version of LLVM prior to 22.0.0, there is a BUILD_BUG_ON failure: $ cat arch/arm/configs/repro.config CONFIG_BPF_SYSCALL=y CONFIG_CFI=y CONFIG_IDPF=y CONFIG_XDP_SOCKETS=y $ make -skj"$(nproc)" ARCH=arm LLVM=1 clean defconfig repro.config drivers/net/ethernet/intel/idpf/xsk.o In file included from drivers/net/ethernet/intel/idpf/xsk.c:4: include/net/libeth/xsk.h:205:2: error: call to '__compiletime_assert_728' declared with 'error' attribute: BUILD_BUG_ON failed: !__builtin_constant_p(tmo == libeth_xsktmo) 205 | BUILD_BUG_ON(!__builtin_constant_p(tmo == libeth_xsktmo)); | ^ ... libeth_xdp_tx_xmit_bulk() indirectly calls libeth_xsk_xmit_fill_buf() but these functions are marked as __always_inline so that the compiler can turn these indirect calls into direct ones and see that the tmo parameter to __libeth_xsk_xmit_fill_buf_md() is ultimately libeth_xsktmo from idpf_xsk_xmit(). Unfortunately, the generic kCFI pass in LLVM expands the kCFI bundles from the indirect calls in libeth_xdp_tx_xmit_bulk() in such a way that later optimizations cannot turn these calls into direct ones, making the BUILD_BUG_ON fail because it cannot be proved at compile time that tmo is libeth_xsktmo. Disable the generic kCFI pass for libeth_xdp_tx_xmit_bulk() to ensure these indirect calls can always be turned into direct calls to avoid this error. Closes: https://github.com/ClangBuiltLinux/linux/issues/2124 Fixes: 9705d6552f58 ("idpf: implement Rx path for AF_XDP") Signed-off-by: Nathan Chancellor Reviewed-by: Aleksandr Loktionov Acked-by: Alexander Lobakin Link: https://patch.msgid.link/20251025-idpf-fix-arm-kcfi-build-error-v1-3-ec57221153ae@kernel.org Signed-off-by: Kees Cook --- include/net/libeth/xdp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index bc3507edd589..898723ab62e8 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -513,7 +513,7 @@ struct libeth_xdp_tx_desc { * can't fail, but can send less frames if there's no enough free descriptors * available. The actual free space is returned by @prep from the driver. */ -static __always_inline u32 +static __always_inline __nocfi_generic u32 libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, u32 n, bool unroll, u64 priv, u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq), From f838d624fd1183e07db86f3138bcd05fd7630a1e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 28 Oct 2025 15:24:24 -0700 Subject: [PATCH 0569/1075] scsi: ufs: core: Revert "Make HID attributes visible" Patch "Make HID attributes visible" is needed for older kernel versions (e.g. 6.12) where ufs_get_device_desc() is called from ufshcd_probe_hba(). In these older kernel versions ufshcd_get_device_desc() may be called after the sysfs attributes have been added. In the upstream kernel however ufshcd_get_device_desc() is called before ufs_sysfs_add_nodes(). See also the ufshcd_device_params_init() call from ufshcd_init(). Hence, calling sysfs_update_group() is not necessary. See also commit 69f5eb78d4b0 ("scsi: ufs: core: Move the ufshcd_device_init(hba, true) call") in kernel v6.13. This patch fixes the following kernel warning: sysfs: cannot create duplicate filename '/devices/platform/3c2d0000.ufs/hid' Workqueue: async async_run_entry_fn Call trace: dump_backtrace+0xfc/0x17c show_stack+0x18/0x28 dump_stack_lvl+0x40/0x104 dump_stack+0x18/0x3c sysfs_warn_dup+0x6c/0xc8 internal_create_group+0x1c8/0x504 sysfs_create_groups+0x38/0x9c ufs_sysfs_add_nodes+0x20/0x58 ufshcd_init+0x1114/0x134c ufshcd_pltfrm_init+0x728/0x7d8 ufs_google_probe+0x30/0x84 platform_probe+0xa0/0xe0 really_probe+0x114/0x454 __driver_probe_device+0xa4/0x160 driver_probe_device+0x44/0x23c __device_attach_driver+0x15c/0x1f4 bus_for_each_drv+0x10c/0x168 __device_attach_async_helper+0x80/0xf8 async_run_entry_fn+0x4c/0x17c process_one_work+0x26c/0x65c worker_thread+0x33c/0x498 kthread+0x110/0x134 ret_from_fork+0x10/0x20 ufshcd 3c2d0000.ufs: ufs_sysfs_add_nodes: sysfs groups creation failed (err = -17) Cc: Daniel Lee Cc: Peter Wang Cc: Bjorn Andersson Cc: Neil Armstrong Fixes: bb7663dec67b ("scsi: ufs: sysfs: Make HID attributes visible") Signed-off-by: Bart Van Assche Fixes: bb7663dec67b ("scsi: ufs: sysfs: Make HID attributes visible") Acked-by: Neil Armstrong Reviewed-by: Peter Wang Reviewed-by: Bjorn Andersson Link: https://patch.msgid.link/20251028222433.1108299-1-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-sysfs.c | 2 +- drivers/ufs/core/ufs-sysfs.h | 1 - drivers/ufs/core/ufshcd.c | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c index c040afc6668e..0086816b27cd 100644 --- a/drivers/ufs/core/ufs-sysfs.c +++ b/drivers/ufs/core/ufs-sysfs.c @@ -1949,7 +1949,7 @@ static umode_t ufs_sysfs_hid_is_visible(struct kobject *kobj, return hba->dev_info.hid_sup ? attr->mode : 0; } -const struct attribute_group ufs_sysfs_hid_group = { +static const struct attribute_group ufs_sysfs_hid_group = { .name = "hid", .attrs = ufs_sysfs_hid, .is_visible = ufs_sysfs_hid_is_visible, diff --git a/drivers/ufs/core/ufs-sysfs.h b/drivers/ufs/core/ufs-sysfs.h index 6efb82a082fd..8d94af3b8077 100644 --- a/drivers/ufs/core/ufs-sysfs.h +++ b/drivers/ufs/core/ufs-sysfs.h @@ -14,6 +14,5 @@ void ufs_sysfs_remove_nodes(struct device *dev); extern const struct attribute_group ufs_sysfs_unit_descriptor_group; extern const struct attribute_group ufs_sysfs_lun_attributes_group; -extern const struct attribute_group ufs_sysfs_hid_group; #endif diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 5d6297aa5c28..2b76f543d072 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8499,8 +8499,6 @@ static int ufs_get_device_desc(struct ufs_hba *hba) DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP) & UFS_DEV_HID_SUPPORT; - sysfs_update_group(&hba->dev->kobj, &ufs_sysfs_hid_group); - model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME]; err = ufshcd_read_string_desc(hba, model_index, From bb44826c3bdbf1fa3957008a04908f45e5666463 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 24 Oct 2025 11:59:15 +0300 Subject: [PATCH 0570/1075] scsi: ufs: ufs-pci: Fix S0ix/S3 for Intel controllers Intel platforms with UFS, can support Suspend-to-Idle (S0ix) and Suspend-to-RAM (S3). For S0ix the link state should be HIBERNATE. For S3, state is lost, so the link state must be OFF. Driver policy, expressed by spm_lvl, can be 3 (link HIBERNATE, device SLEEP) for S0ix but must be changed to 5 (link OFF, device POWEROFF) for S3. Fix support for S0ix/S3 by switching spm_lvl as needed. During suspend ->prepare(), if the suspend target state is not Suspend-to-Idle, ensure the spm_lvl is at least 5 to ensure that resume will be possible from deep sleep states. During suspend ->complete(), restore the spm_lvl to its original value that is suitable for S0ix. This fix is first needed in Intel Alder Lake based controllers. Fixes: 7dc9fb47bc9a ("scsi: ufs: ufs-pci: Add support for Intel ADL") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20251024085918.31825-2-adrian.hunter@intel.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 67 +++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index b87e03777395..89f88b693850 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ struct intel_host { u32 dsm_fns; u32 active_ltr; u32 idle_ltr; + int saved_spm_lvl; struct dentry *debugfs_root; struct gpio_desc *reset_gpio; }; @@ -347,6 +349,7 @@ static int ufs_intel_common_init(struct ufs_hba *hba) host = devm_kzalloc(hba->dev, sizeof(*host), GFP_KERNEL); if (!host) return -ENOMEM; + host->saved_spm_lvl = -1; ufshcd_set_variant(hba, host); intel_dsm_init(host, hba->dev); if (INTEL_DSM_SUPPORTED(host, RESET)) { @@ -538,6 +541,66 @@ static int ufshcd_pci_restore(struct device *dev) return ufshcd_system_resume(dev); } + +static int ufs_intel_suspend_prepare(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + struct intel_host *host = ufshcd_get_variant(hba); + int err; + + /* + * Only s2idle (S0ix) retains link state. Force power-off + * (UFS_PM_LVL_5) for any other case. + */ + if (pm_suspend_target_state != PM_SUSPEND_TO_IDLE && hba->spm_lvl < UFS_PM_LVL_5) { + host->saved_spm_lvl = hba->spm_lvl; + hba->spm_lvl = UFS_PM_LVL_5; + } + + err = ufshcd_suspend_prepare(dev); + + if (err < 0 && host->saved_spm_lvl != -1) { + hba->spm_lvl = host->saved_spm_lvl; + host->saved_spm_lvl = -1; + } + + return err; +} + +static void ufs_intel_resume_complete(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + struct intel_host *host = ufshcd_get_variant(hba); + + ufshcd_resume_complete(dev); + + if (host->saved_spm_lvl != -1) { + hba->spm_lvl = host->saved_spm_lvl; + host->saved_spm_lvl = -1; + } +} + +static int ufshcd_pci_suspend_prepare(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + if (!strcmp(hba->vops->name, "intel-pci")) + return ufs_intel_suspend_prepare(dev); + + return ufshcd_suspend_prepare(dev); +} + +static void ufshcd_pci_resume_complete(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + if (!strcmp(hba->vops->name, "intel-pci")) { + ufs_intel_resume_complete(dev); + return; + } + + ufshcd_resume_complete(dev); +} #endif /** @@ -611,8 +674,8 @@ static const struct dev_pm_ops ufshcd_pci_pm_ops = { .thaw = ufshcd_system_resume, .poweroff = ufshcd_system_suspend, .restore = ufshcd_pci_restore, - .prepare = ufshcd_suspend_prepare, - .complete = ufshcd_resume_complete, + .prepare = ufshcd_pci_suspend_prepare, + .complete = ufshcd_pci_resume_complete, #endif }; From d34caa89a132cd69efc48361d4772251546fdb88 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 24 Oct 2025 11:59:16 +0300 Subject: [PATCH 0571/1075] scsi: ufs: core: Add a quirk to suppress link_startup_again ufshcd_link_startup() has a facility (link_startup_again) to issue DME_LINKSTARTUP a 2nd time even though the 1st time was successful. Some older hardware benefits from that, however the behaviour is non-standard, and has been found to cause link startup to be unreliable for some Intel Alder Lake based host controllers. Add UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE to suppress link_startup_again, in preparation for setting the quirk for affected controllers. Fixes: 7dc9fb47bc9a ("scsi: ufs: ufs-pci: Add support for Intel ADL") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20251024085918.31825-3-adrian.hunter@intel.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 3 ++- include/ufs/ufshcd.h | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 2b76f543d072..453a99ec6282 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5066,7 +5066,8 @@ static int ufshcd_link_startup(struct ufs_hba *hba) * If UFS device isn't active then we will have to issue link startup * 2 times to make sure the device state move to active. */ - if (!ufshcd_is_ufs_dev_active(hba)) + if (!(hba->quirks & UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE) && + !ufshcd_is_ufs_dev_active(hba)) link_startup_again = true; link_startup: diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 9425cfd9d00e..0f95576bf1f6 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -688,6 +688,13 @@ enum ufshcd_quirks { * single doorbell mode. */ UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25, + + /* + * This quirk indicates that DME_LINKSTARTUP should not be issued a 2nd + * time (refer link_startup_again) after the 1st time was successful, + * because it causes link startup to become unreliable. + */ + UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE = 1 << 26, }; enum ufshcd_caps { From d968e99488c4b08259a324a89e4ed17bf36561a4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 24 Oct 2025 11:59:17 +0300 Subject: [PATCH 0572/1075] scsi: ufs: ufs-pci: Set UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE for Intel ADL Link startup becomes unreliable for Intel Alder Lake based host controllers when a 2nd DME_LINKSTARTUP is issued unnecessarily. Employ UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE to suppress that from happening. Fixes: 7dc9fb47bc9a ("scsi: ufs: ufs-pci: Add support for Intel ADL") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20251024085918.31825-4-adrian.hunter@intel.com Signed-off-by: Martin K. Petersen --- drivers/ufs/host/ufshcd-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index 89f88b693850..5f65dfad1a71 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -428,7 +428,8 @@ static int ufs_intel_lkf_init(struct ufs_hba *hba) static int ufs_intel_adl_init(struct ufs_hba *hba) { hba->nop_out_timeout = 200; - hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; + hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8 | + UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE; hba->caps |= UFSHCD_CAP_WB_EN; return ufs_intel_common_init(hba); } From a2b32bc1d9e359a9f90d0de6af16699facb10935 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 24 Oct 2025 11:59:18 +0300 Subject: [PATCH 0573/1075] scsi: ufs: core: Fix invalid probe error return value After DME Link Startup, the error return value is set to the MIPI UniPro GenericErrorCode which can be 0 (SUCCESS) or 1 (FAILURE). Upon failure during driver probe, the error code 1 is propagated back to the driver probe function which must return a negative value to indicate an error, but 1 is not negative, so the probe is considered to be successful even though it failed. Subsequently, removing the driver results in an oops because it is not in a valid state. This happens because none of the callers of ufshcd_init() expect a non-negative error code. Fix the return value and documentation to match actual usage. Fixes: 69f5eb78d4b0 ("scsi: ufs: core: Move the ufshcd_device_init(hba, true) call") Cc: stable@vger.kernel.org Signed-off-by: Adrian Hunter Reviewed-by: Bart Van Assche Link: https://patch.msgid.link/20251024085918.31825-5-adrian.hunter@intel.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 453a99ec6282..d6a060a72461 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10656,7 +10656,7 @@ static int ufshcd_add_scsi_host(struct ufs_hba *hba) * @mmio_base: base register address * @irq: Interrupt line of device * - * Return: 0 on success, non-zero value on failure. + * Return: 0 on success; < 0 on failure. */ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) { @@ -10897,7 +10897,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->is_irq_enabled = false; ufshcd_hba_exit(hba); out_error: - return err; + return err > 0 ? -EIO : err; } EXPORT_SYMBOL_GPL(ufshcd_init); From 4f4c654f5a0f6560b938a5ab05ec5940aa6c1c29 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Wed, 29 Oct 2025 15:11:52 -0300 Subject: [PATCH 0574/1075] ALSA: hda/realtek: Enable mic on Vaio RPL Vaio RPL is equipped with ACL256, and needs a fix to make the internal mic and headphone mic to work. Also must to limits the internal microphone boost. Signed-off-by: Edson Juliano Drosdeck Link: https://patch.msgid.link/20251029181152.389302-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index e448c0c21b57..4aec5067c59d 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -3736,6 +3736,7 @@ enum { ALC285_FIXUP_ASUS_GA605K_I2C_SPEAKER2_TO_DAC1, ALC269_FIXUP_POSITIVO_P15X_HEADSET_MIC, ALC289_FIXUP_ASUS_ZEPHYRUS_DUAL_SPK, + ALC256_FIXUP_VAIO_RPL_MIC_NO_PRESENCE, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -6172,6 +6173,16 @@ static const struct hda_fixup alc269_fixups[] = { { 0x1e, 0x90170150 }, /* Internal Speaker */ { } }, + }, + [ALC256_FIXUP_VAIO_RPL_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a1113c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x22a190a0 }, /* dock mic */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_LIMIT_INT_MIC_BOOST } }; @@ -6960,6 +6971,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x971d, "Clevo N970T[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa500, "Clevo NL5[03]RU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa554, "VAIO VJFH52", ALC269_FIXUP_VAIO_VJFH52_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0xa559, "VAIO RPL", ALC256_FIXUP_VAIO_RPL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa600, "Clevo NL50NU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa650, "Clevo NP[567]0SN[CD]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0xa671, "Clevo NP70SN[CDE]", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), From 70e8335485966d7d4ed85976dceab52803b151a2 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 23:13:39 +0530 Subject: [PATCH 0575/1075] wifi: zd1211rw: fix potential memory leak in __zd_usb_enable_rx() The memory allocated for urbs with kcalloc() is not freed on any error path. Fix that by freeing it in the error path. Fixes: e85d0918b54f ("[PATCH] ZyDAS ZD1211 USB-WLAN driver") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251028174341.139134-1-nihaal@cse.iitm.ac.in Signed-off-by: Johannes Berg --- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index 2faa0de2a36e..8ee15a15f4ca 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -791,6 +791,7 @@ static int __zd_usb_enable_rx(struct zd_usb *usb) if (urbs) { for (i = 0; i < RX_URBS_COUNT; i++) free_rx_urb(urbs[i]); + kfree(urbs); } return r; } From 51e5ad549c43b557c7da1e4d1a1dcf061b4a5f6c Mon Sep 17 00:00:00 2001 From: Ranganath V N Date: Sun, 26 Oct 2025 22:03:12 +0530 Subject: [PATCH 0576/1075] net: sctp: fix KMSAN uninit-value in sctp_inq_pop Fix an issue detected by syzbot: KMSAN reported an uninitialized-value access in sctp_inq_pop BUG: KMSAN: uninit-value in sctp_inq_pop The issue is actually caused by skb trimming via sk_filter() in sctp_rcv(). In the reproducer, skb->len becomes 1 after sk_filter(), which bypassed the original check: if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) + skb_transport_offset(skb)) To handle this safely, a new check should be performed after sk_filter(). Reported-by: syzbot+d101e12bccd4095460e7@syzkaller.appspotmail.com Tested-by: syzbot+d101e12bccd4095460e7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d101e12bccd4095460e7 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Xin Long Signed-off-by: Ranganath V N Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251026-kmsan_fix-v3-1-2634a409fa5f@gmail.com Signed-off-by: Paolo Abeni --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 7e99894778d4..e119e460ccde 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -190,7 +190,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_release; nf_reset_ct(skb); - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb) || skb->len < sizeof(struct sctp_chunkhdr)) goto discard_release; /* Create an SCTP packet structure. */ From 847ebc4476714f81d7dea73e5ea69448d7fe9d3a Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 29 Oct 2025 12:34:31 +0100 Subject: [PATCH 0577/1075] x86/CPU/AMD: Extend Zen6 model range Add some more Zen6 models. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Link: https://patch.msgid.link/20251029123056.19987-1-bp@kernel.org --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bc29be670a2a..8e36964a7721 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -516,7 +516,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ZEN5); break; case 0x50 ... 0x5f: - case 0x90 ... 0xaf: + case 0x80 ... 0xaf: case 0xc0 ... 0xcf: setup_force_cpu_cap(X86_FEATURE_ZEN6); break; From 89216c9051ef6635f1514f8e0d2f9cd63b37a3b6 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 28 Oct 2025 10:29:48 -0700 Subject: [PATCH 0578/1075] x86/cpu: Add/fix core comments for {Panther,Nova} Lake The E-core in Panther Lake is Darkmont, not Crestmont. Nova Lake is built from Coyote Cove (P-core) and Arctic Wolf (E-core). Fixes: 43bb700cff6b ("x86/cpu: Update Intel Family comments") Signed-off-by: Tony Luck Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Cc: Peter Zijlstra Link: https://patch.msgid.link/20251028172948.6721-1-tony.luck@intel.com --- arch/x86/include/asm/intel-family.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index f32a0eca2ae5..950bfd006905 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -150,12 +150,12 @@ #define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* Lion Cove / Skymont */ -#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Crestmont */ +#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Darkmont */ #define INTEL_WILDCATLAKE_L IFM(6, 0xD5) -#define INTEL_NOVALAKE IFM(18, 0x01) -#define INTEL_NOVALAKE_L IFM(18, 0x03) +#define INTEL_NOVALAKE IFM(18, 0x01) /* Coyote Cove / Arctic Wolf */ +#define INTEL_NOVALAKE_L IFM(18, 0x03) /* Coyote Cove / Arctic Wolf */ /* "Small Core" Processors (Atom/E-Core) */ From 47a0925ee4bd2689f0aef4dbd67dd46442fe1ca2 Mon Sep 17 00:00:00 2001 From: Maud Spierings Date: Thu, 30 Oct 2025 07:35:38 +0100 Subject: [PATCH 0579/1075] regulator: bd718x7: Fix voltages scaled by resistor divider The .min_sel and .max_sel fields remained uninitialized in the new linear_range, causing an error further down the line. Copy the old values of these fields to the new one as they represent the range of register values, which does not change. Fixes: d2ad981151b3a ("regulator: bd718x7: Support external connection to scale voltages") Signed-off-by: Maud Spierings Reviewed-by: Matti Vaittinen Link: https://patch.msgid.link/20251030-mini_iv-v3-2-ef56c4d9f219@gocontroll.com Signed-off-by: Mark Brown --- drivers/regulator/bd718x7-regulator.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c index 022d98f3c32a..ea9c4058ee6a 100644 --- a/drivers/regulator/bd718x7-regulator.c +++ b/drivers/regulator/bd718x7-regulator.c @@ -1613,6 +1613,8 @@ static int setup_feedback_loop(struct device *dev, struct device_node *np, step /= r1; new[j].min = min; + new[j].min_sel = desc->linear_ranges[j].min_sel; + new[j].max_sel = desc->linear_ranges[j].max_sel; new[j].step = step; dev_dbg(dev, "%s: old range min %d, step %d\n", From 0b39ca457241aeca07a613002512573e8804f93a Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Thu, 30 Oct 2025 04:39:18 +0000 Subject: [PATCH 0580/1075] blk-crypto: use BLK_STS_INVAL for alignment errors Make __blk_crypto_bio_prep() propagate BLK_STS_INVAL when IO segments fail the data unit alignment check. This was flagged by an LTP test that expects EINVAL when performing an O_DIRECT read with a misaligned buffer [1]. Cc: Eric Biggers Cc: Christoph Hellwig Link: https://lore.kernel.org/all/aP-c5gPjrpsn0vJA@google.com/ [1] Signed-off-by: Carlos Llamas Reviewed-by: Eric Biggers Signed-off-by: Jens Axboe --- block/blk-crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-crypto.c b/block/blk-crypto.c index 4b1ad84d1b5a..3e7bf1974cbd 100644 --- a/block/blk-crypto.c +++ b/block/blk-crypto.c @@ -292,7 +292,7 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr) } if (!bio_crypt_check_alignment(bio)) { - bio->bi_status = BLK_STS_IOERR; + bio->bi_status = BLK_STS_INVAL; goto fail; } From 3257bd193fa1702791978d8418c9a96e3def082c Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Tue, 14 Oct 2025 13:30:51 +0200 Subject: [PATCH 0581/1075] drm/imx: parallel-display: convert to devm_drm_bridge_alloc() API This is the new API for allocating DRM bridges. This conversion was missed during the initial conversion of all bridges to the new API. Thus all kernels with commit 94d50c1a2ca3 ("drm/bridge: get/put the bridge reference in drm_bridge_attach/detach()") and using this driver now warn due to drm_bridge_attach() incrementing the refcount, which is not initialized without using devm_drm_bridge_alloc() for allocation. To make the conversion simple and straightforward without messing up with the drmm_simple_encoder_alloc(), move the struct drm_bridge from struct imx_parallel_display_encoder to struct imx_parallel_display. Also remove the 'struct imx_parallel_display *pd' from struct imx_parallel_display_encoder, not needed anymore. Fixes: 94d50c1a2ca3 ("drm/bridge: get/put the bridge reference in drm_bridge_attach/detach()") Reported-by: Ernest Van Hoecke Closes: https://lore.kernel.org/all/hlf4wdopapxnh4rekl5s3kvoi6egaga3lrjfbx6r223ar3txri@3ik53xw5idyh/ Signed-off-by: Luca Ceresoli Reviewed-by: Louis Chauvet Tested-by: Ernest Van Hoecke Link: https://patch.msgid.link/20251014-drm-bridge-alloc-imx-ipuv3-v1-1-a1bb1dcbff50@bootlin.com Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3/parallel-display.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3/parallel-display.c b/drivers/gpu/drm/imx/ipuv3/parallel-display.c index 6d8325c76697..3d0de9c6e925 100644 --- a/drivers/gpu/drm/imx/ipuv3/parallel-display.c +++ b/drivers/gpu/drm/imx/ipuv3/parallel-display.c @@ -25,19 +25,18 @@ struct imx_parallel_display_encoder { struct drm_encoder encoder; - struct drm_bridge bridge; - struct imx_parallel_display *pd; }; struct imx_parallel_display { struct device *dev; u32 bus_format; struct drm_bridge *next_bridge; + struct drm_bridge bridge; }; static inline struct imx_parallel_display *bridge_to_imxpd(struct drm_bridge *b) { - return container_of(b, struct imx_parallel_display_encoder, bridge)->pd; + return container_of(b, struct imx_parallel_display, bridge); } static const u32 imx_pd_bus_fmts[] = { @@ -195,15 +194,13 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data) if (IS_ERR(imxpd_encoder)) return PTR_ERR(imxpd_encoder); - imxpd_encoder->pd = imxpd; encoder = &imxpd_encoder->encoder; - bridge = &imxpd_encoder->bridge; + bridge = &imxpd->bridge; ret = imx_drm_encoder_parse_of(drm, encoder, imxpd->dev->of_node); if (ret) return ret; - bridge->funcs = &imx_pd_bridge_funcs; drm_bridge_attach(encoder, bridge, NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR); connector = drm_bridge_connector_init(drm, encoder); @@ -228,9 +225,10 @@ static int imx_pd_probe(struct platform_device *pdev) u32 bus_format = 0; const char *fmt; - imxpd = devm_kzalloc(dev, sizeof(*imxpd), GFP_KERNEL); - if (!imxpd) - return -ENOMEM; + imxpd = devm_drm_bridge_alloc(dev, struct imx_parallel_display, bridge, + &imx_pd_bridge_funcs); + if (IS_ERR(imxpd)) + return PTR_ERR(imxpd); /* port@1 is the output port */ imxpd->next_bridge = devm_drm_of_get_bridge(dev, np, 1, 0); From bf7e97910b9f4d9679e075a39be371a4ed65dbd4 Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Tue, 14 Oct 2025 13:30:52 +0200 Subject: [PATCH 0582/1075] drm/imx: parallel-display: add the bridge before attaching it Invoking drm_bridge_add() is good practice, so add it to this driver. Link: https://lore.kernel.org/all/DDHZ5GO9MPF0.CGYTVBI74FOZ@bootlin.com Signed-off-by: Luca Ceresoli Reviewed-by: Louis Chauvet Link: https://patch.msgid.link/20251014-drm-bridge-alloc-imx-ipuv3-v1-2-a1bb1dcbff50@bootlin.com Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3/parallel-display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/imx/ipuv3/parallel-display.c b/drivers/gpu/drm/imx/ipuv3/parallel-display.c index 3d0de9c6e925..7fc6af703307 100644 --- a/drivers/gpu/drm/imx/ipuv3/parallel-display.c +++ b/drivers/gpu/drm/imx/ipuv3/parallel-display.c @@ -256,6 +256,8 @@ static int imx_pd_probe(struct platform_device *pdev) platform_set_drvdata(pdev, imxpd); + devm_drm_bridge_add(dev, &imxpd->bridge); + return component_add(dev, &imx_pd_ops); } From 64e2f60f355e556337fcffe80b9bcff1b22c9c42 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 30 Oct 2025 15:55:05 +0100 Subject: [PATCH 0583/1075] s390: Disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP As reported by Luiz Capitulino enabling HVO on s390 leads to reproducible crashes. The problem is that kernel page tables are modified without flushing corresponding TLB entries. Even if it looks like the empty flush_tlb_all() implementation on s390 is the problem, it is actually a different problem: on s390 it is not allowed to replace an active/valid page table entry with another valid page table entry without the detour over an invalid entry. A direct replacement may lead to random crashes and/or data corruption. In order to invalidate an entry special instructions have to be used (e.g. ipte or idte). Alternatively there are also special instructions available which allow to replace a valid entry with a different valid entry (e.g. crdte or cspg). Given that the HVO code currently does not provide the hooks to allow for an implementation which is compliant with the s390 architecture requirements, disable ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP again, which is basically a revert of the original patch which enabled it. Reported-by: Luiz Capitulino Closes: https://lore.kernel.org/all/20251028153930.37107-1-luizcap@redhat.com/ Fixes: 00a34d5a99c0 ("s390: select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP") Cc: stable@vger.kernel.org Tested-by: Luiz Capitulino Reviewed-by: Gerald Schaefer Reviewed-by: David Hildenbrand Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c4145672ca34..df22b10d9141 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -158,7 +158,6 @@ config S390 select ARCH_WANT_IRQS_OFF_ACTIVATE_MM select ARCH_WANT_KERNEL_PMD_MKWRITE select ARCH_WANT_LD_ORPHAN_WARN - select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP select ARCH_WANTS_THP_SWAP select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 From da888524c393b4a14727e1a821bdd51313d0a2d3 Mon Sep 17 00:00:00 2001 From: Sascha Bischoff Date: Tue, 21 Oct 2025 09:44:09 +0000 Subject: [PATCH 0584/1075] KVM: arm64: vgic-v3: Trap all if no in-kernel irqchip If there is no in-kernel irqchip for a GICv3 host set all of the trap bits to block all accesses. This fixes the no-vgic-v3 selftest again. Fixes: 3193287ddffb ("KVM: arm64: gic-v3: Only set ICH_HCR traps for v2-on-v3 or v3 guests") Reported-by: Mark Brown Closes: https://lore.kernel.org/all/23072856-6b8c-41e2-93d1-ea8a240a7079@sirena.org.uk Signed-off-by: Sascha Bischoff Reviewed-by: Sebastian Ott Tested-by: Mark Brown Link: https://patch.msgid.link/20251021094358.1963807-1-sascha.bischoff@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-v3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 6fbb4b099855..2f75ef14d339 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -301,7 +301,8 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) return; /* Hide GICv3 sysreg if necessary */ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 || + !irqchip_in_kernel(vcpu->kvm)) { vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | ICH_HCR_EL2_TC); return; From a24f7afce048e724be072bd063ed864f124daf81 Mon Sep 17 00:00:00 2001 From: Maximilian Dittgen Date: Mon, 20 Oct 2025 16:59:46 +0200 Subject: [PATCH 0585/1075] KVM: selftests: fix MAPC RDbase target formatting in vgic_lpi_stress Since GITS_TYPER.PTA == 0, the ITS MAPC command demands a CPU ID, rather than a physical redistributor address, for its RDbase command argument. As such, when MAPC-ing guest ITS collections, vgic_lpi_stress iterates over CPU IDs in the range [0, nr_cpus), passing them as the RDbase vcpu_id argument to its_send_mapc_cmd(). However, its_encode_target() in the its_send_mapc_cmd() selftest handler expects RDbase arguments to be formatted with a 16 bit offset, as shown by the 16-bit target_addr right shift its implementation: its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16) At the moment, all CPU IDs passed into its_send_mapc_cmd() have no offset, therefore becoming 0x0 after the bit shift. Thus, when vgic_its_cmd_handle_mapc() receives the ITS command in vgic-its.c, it always interprets the RDbase target CPU as CPU 0. All interrupts sent to collections will be processed by vCPU 0, which defeats the purpose of this multi-vCPU test. Fix by creating procnum_to_rdbase() helper function, which left-shifts the vCPU parameter received by its_send_mapc_cmd 16 bits before passing it to its_encode_target for encoding. Signed-off-by: Maximilian Dittgen Link: https://patch.msgid.link/20251020145946.48288-1-mdittgen@amazon.de Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 09f270545646..0e2f8ed90f30 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -15,6 +15,8 @@ #include "gic_v3.h" #include "processor.h" +#define GITS_COLLECTION_TARGET_SHIFT 16 + static u64 its_read_u64(unsigned long offset) { return readq_relaxed(GITS_BASE_GVA + offset); @@ -163,6 +165,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col) its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); } +static u64 procnum_to_rdbase(u32 vcpu_id) +{ + return vcpu_id << GITS_COLLECTION_TARGET_SHIFT; +} + #define GITS_CMDQ_POLL_ITERATIONS 0 static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) @@ -217,7 +224,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val its_encode_cmd(&cmd, GITS_CMD_MAPC); its_encode_collection(&cmd, collection_id); - its_encode_target(&cmd, vcpu_id); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); its_encode_valid(&cmd, valid); its_send_cmd(cmdq_base, &cmd); From 92e781c93ebe75e39ecdf78fb8ef1fdf1b63a9f8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Oct 2025 22:19:29 +0100 Subject: [PATCH 0586/1075] KVM: arm64: selftests: Add SCTLR2_EL2 to get-reg-list We recently added support for SCTLR2_EL2 to the kernel but did not add it to get-reg-list, resulting in it reporting the missing register when it is available. Add it. Signed-off-by: Mark Brown Link: https://patch.msgid.link/20251023-b4-kvm-arm64-get-reg-list-sctlr-el2-v1-1-088f88ff992a@kernel.org Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index c9b84eeaab6b..2abef0a86d46 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -63,6 +63,7 @@ static struct feature_id_reg feat_id_regs[] = { REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP), REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), @@ -718,6 +719,7 @@ static __u64 el2_regs[] = { SYS_REG(VMPIDR_EL2), SYS_REG(SCTLR_EL2), SYS_REG(ACTLR_EL2), + SYS_REG(SCTLR2_EL2), SYS_REG(HCR_EL2), SYS_REG(MDCR_EL2), SYS_REG(CPTR_EL2), From a186fbcfd845699d51809f7c7e54cf997fe32820 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Oct 2025 00:43:39 +0100 Subject: [PATCH 0587/1075] KVM: arm64: selftests: Filter ZCR_EL2 in get-reg-list get-reg-list includes ZCR_EL2 in the list of EL2 registers that it looks for when NV is enabled but does not have any feature gate for this register, meaning that testing any combination of features that includes EL2 but does not include SVE will result in a test failure due to a missing register being reported: | The following lines are missing registers: | | ARM64_SYS_REG(3, 4, 1, 2, 0), Add ZCR_EL2 to feat_id_regs so that the test knows not to expect to see it without SVE being enabled. Fixes: 3a90b6f27964 ("KVM: arm64: selftests: get-reg-list: Add base EL2 registers") Signed-off-by: Mark Brown Link: https://patch.msgid.link/20251024-kvm-arm64-get-reg-list-zcr-el2-v1-1-0cd0ff75e22f@kernel.org Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 2abef0a86d46..0a3a94c4cca1 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -69,6 +69,7 @@ static struct feature_id_reg feat_id_regs[] = { REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP), REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), }; bool filter_reg(__u64 reg) From f71f7afd0a0cd3f044cd2f8aba71a1a7229df762 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Thu, 16 Oct 2025 17:45:41 +0100 Subject: [PATCH 0588/1075] KVM: arm64: Check range args for pKVM mem transitions There's currently no verification for host issued ranges in most of the pKVM memory transitions. The end boundary might therefore be subject to overflow and later checks could be evaded. Close this loophole with an additional pfn_range_is_valid() check on a per public function basis. Once this check has passed, it is safe to convert pfn and nr_pages into a phys_addr_t and a size. host_unshare_guest transition is already protected via __check_host_shared_guest(), while assert_host_shared_guest() callers are already ignoring host checks. Signed-off-by: Vincent Donnefort Link: https://patch.msgid.link/20251016164541.3771235-1-vdonnefort@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 28 +++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index ddc8beb55eee..49db32f3ddf7 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -367,6 +367,19 @@ static int host_stage2_unmap_dev_all(void) return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); } +/* + * Ensure the PFN range is contained within PA-range. + * + * This check is also robust to overflows and is therefore a requirement before + * using a pfn/nr_pages pair from an untrusted source. + */ +static bool pfn_range_is_valid(u64 pfn, u64 nr_pages) +{ + u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT); + + return pfn < limit && ((limit - pfn) >= nr_pages); +} + struct kvm_mem_range { u64 start; u64 end; @@ -776,6 +789,9 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) void *virt = __hyp_va(phys); int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); hyp_lock_component(); @@ -804,6 +820,9 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) u64 virt = (u64)__hyp_va(phys); int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); hyp_lock_component(); @@ -887,6 +906,9 @@ int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) u64 size = PAGE_SIZE * nr_pages; int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); if (!ret) @@ -902,6 +924,9 @@ int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) u64 size = PAGE_SIZE * nr_pages; int ret; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + host_lock_component(); ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); if (!ret) @@ -945,6 +970,9 @@ int __pkvm_host_share_guest(u64 pfn, u64 gfn, u64 nr_pages, struct pkvm_hyp_vcpu if (prot & ~KVM_PGTABLE_PROT_RWX) return -EINVAL; + if (!pfn_range_is_valid(pfn, nr_pages)) + return -EINVAL; + ret = __guest_check_transition_size(phys, ipa, nr_pages, &size); if (ret) return ret; From 103e17aac09cdd358133f9e00998b75d6c1f1518 Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Fri, 17 Oct 2025 07:57:10 +0000 Subject: [PATCH 0589/1075] KVM: arm64: Check the untrusted offset in FF-A memory share Verify the offset to prevent OOB access in the hypervisor FF-A buffer in case an untrusted large enough value [U32_MAX - sizeof(struct ffa_composite_mem_region) + 1, U32_MAX] is set from the host kernel. Signed-off-by: Sebastian Ene Acked-by: Will Deacon Link: https://patch.msgid.link/20251017075710.2605118-1-sebastianene@google.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/ffa.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 4e16f9b96f63..58b7d0c477d7 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -479,7 +479,7 @@ static void __do_ffa_mem_xfer(const u64 func_id, struct ffa_mem_region_attributes *ep_mem_access; struct ffa_composite_mem_region *reg; struct ffa_mem_region *buf; - u32 offset, nr_ranges; + u32 offset, nr_ranges, checked_offset; int ret = 0; if (addr_mbz || npages_mbz || fraglen > len || @@ -516,7 +516,12 @@ static void __do_ffa_mem_xfer(const u64 func_id, goto out_unlock; } - if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) { + if (check_add_overflow(offset, sizeof(struct ffa_composite_mem_region), &checked_offset)) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + if (fraglen < checked_offset) { ret = FFA_RET_INVALID_PARAMETERS; goto out_unlock; } From 2618849f31e7cf51fadd4a5242458501a6d5b315 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 23 Oct 2025 19:44:04 +1030 Subject: [PATCH 0590/1075] btrfs: ensure no dirty metadata is written back for an fs with errors [BUG] During development of a minor feature (make sure all btrfs_bio::end_io() is called in task context), I noticed a crash in generic/388, where metadata writes triggered new works after btrfs_stop_all_workers(). It turns out that it can even happen without any code modification, just using RAID5 for metadata and the same workload from generic/388 is going to trigger the use-after-free. [CAUSE] If btrfs hits an error, the fs is marked as error, no new transaction is allowed thus metadata is in a frozen state. But there are some metadata modifications before that error, and they are still in the btree inode page cache. Since there will be no real transaction commit, all those dirty folios are just kept as is in the page cache, and they can not be invalidated by invalidate_inode_pages2() call inside close_ctree(), because they are dirty. And finally after btrfs_stop_all_workers(), we call iput() on btree inode, which triggers writeback of those dirty metadata. And if the fs is using RAID56 metadata, this will trigger RMW and queue new works into rmw_workers, which is already stopped, causing warning from queue_work() and use-after-free. [FIX] Add a special handling for write_one_eb(), that if the fs is already in an error state, immediately mark the bbio as failure, instead of really submitting them. Then during close_ctree(), iput() will just discard all those dirty tree blocks without really writing them back, thus no more new jobs for already stopped-and-freed workqueues. The extra discard in write_one_eb() also acts as an extra safenet. E.g. the transaction abort is triggered by some extent/free space tree corruptions, and since extent/free space tree is already corrupted some tree blocks may be allocated where they shouldn't be (overwriting existing tree blocks). In that case writing them back will further corrupting the fs. CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Filipe Manana Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 755ec6dfd51c..23273d0e6f22 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2228,6 +2228,14 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb, wbc_account_cgroup_owner(wbc, folio, range_len); folio_unlock(folio); } + /* + * If the fs is already in error status, do not submit any writeback + * but immediately finish it. + */ + if (unlikely(BTRFS_FS_ERROR(fs_info))) { + btrfs_bio_end_io(bbio, errno_to_blk_status(BTRFS_FS_ERROR(fs_info))); + return; + } btrfs_submit_bbio(bbio, 0); } From f260c6aff0b8af236084012d14f9f1bf792ea883 Mon Sep 17 00:00:00 2001 From: Shardul Bankar Date: Sun, 26 Oct 2025 01:30:21 +0530 Subject: [PATCH 0591/1075] btrfs: fix memory leak of qgroup_list in btrfs_add_qgroup_relation When btrfs_add_qgroup_relation() is called with invalid qgroup levels (src >= dst), the function returns -EINVAL directly without freeing the preallocated qgroup_list structure passed by the caller. This causes a memory leak because the caller unconditionally sets the pointer to NULL after the call, preventing any cleanup. The issue occurs because the level validation check happens before the mutex is acquired and before any error handling path that would free the prealloc pointer. On this early return, the cleanup code at the 'out' label (which includes kfree(prealloc)) is never reached. In btrfs_ioctl_qgroup_assign(), the code pattern is: prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst, prealloc); prealloc = NULL; // Always set to NULL regardless of return value ... kfree(prealloc); // This becomes kfree(NULL), does nothing When the level check fails, 'prealloc' is never freed by either the callee or the caller, resulting in a 64-byte memory leak per failed operation. This can be triggered repeatedly by an unprivileged user with access to a writable btrfs mount, potentially exhausting kernel memory. Fix this by freeing prealloc before the early return, ensuring prealloc is always freed on all error paths. Fixes: 4addc1ffd67a ("btrfs: qgroup: preallocate memory before adding a relation") Reviewed-by: Qu Wenruo Signed-off-by: Shardul Bankar Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1175b8192cd7..31ad8580322a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1539,8 +1539,10 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst ASSERT(prealloc); /* Check the level of src and dst first */ - if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) + if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst)) { + kfree(prealloc); return -EINVAL; + } mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) { From 953902e4fb4c373c81a977f78e40f9f93a79e20f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 24 Oct 2025 12:30:56 +0100 Subject: [PATCH 0592/1075] btrfs: set inode flag BTRFS_INODE_COPY_EVERYTHING when logging new name If we are logging a new name make sure our inode has the runtime flag BTRFS_INODE_COPY_EVERYTHING set so that at btrfs_log_inode() we will find new inode refs/extrefs in the subvolume tree and copy them into the log tree. We are currently doing it when adding a new link but we are missing it when renaming. An example where this makes a new name not persisted: 1) create symlink with name foo in directory A 2) fsync directory A, which persists the symlink 3) rename the symlink from foo to bar 4) fsync directory A to persist the new symlink name Step 4 isn't working correctly as it's not logging the new name and also leaving the old inode ref in the log tree, so after a power failure the symlink still has the old name of "foo". This is because when we first fsync directoy A we log the symlink's inode (as it's a new entry) and at btrfs_log_inode() we set the log mode to LOG_INODE_ALL and then because we are using that mode and the inode has the runtime flag BTRFS_INODE_NEEDS_FULL_SYNC set, we clear that flag as well as the flag BTRFS_INODE_COPY_EVERYTHING. That means the next time we log the inode, during the rename through the call to btrfs_log_new_name() (calling btrfs_log_inode_parent() and then btrfs_log_inode()), we will not search the subvolume tree for new refs/extrefs and jump directory to the 'log_extents' label. Fix this by making sure we set BTRFS_INODE_COPY_EVERYTHING on an inode when we are about to log a new name. A test case for fstests will follow soon. Reported-by: Vyacheslav Kovalevsky Link: https://lore.kernel.org/linux-btrfs/ac949c74-90c2-4b9a-b7fd-1ffc5c3175c7@gmail.com/ Reviewed-by: Boris Burkov Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 1 - fs/btrfs/tree-log.c | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 02cb081697fe..b95175116ea3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6873,7 +6873,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, BTRFS_I(inode)->dir_index = 0ULL; inode_inc_iversion(inode); inode_set_ctime_current(inode); - set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode), &fname.disk_name, 1, index); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6aad6b65522b..00a59fb79167 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7910,6 +7910,9 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, bool log_pinned = false; int ret; + /* The inode has a new name (ref/extref), so make sure we log it. */ + set_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); + btrfs_init_log_ctx(&ctx, inode); ctx.logging_new_name = true; From 3b1a4a59a2086badab391687a6a0b86e03048393 Mon Sep 17 00:00:00 2001 From: austinchang Date: Wed, 29 Oct 2025 09:35:27 +0000 Subject: [PATCH 0593/1075] btrfs: mark dirty extent range for out of bound prealloc extents In btrfs_fallocate(), when the allocated range overlaps with a prealloc extent and the extent starts after i_size, the range doesn't get marked dirty in file_extent_tree. This results in persisting an incorrect disk_i_size for the inode when not using the no-holes feature. This is reproducible since commit 41a2ee75aab0 ("btrfs: introduce per-inode file extent tree"), then became hidden since commit 3d7db6e8bd22 ("btrfs: don't allocate file extent tree for non regular files") and then visible again after commit 8679d2687c35 ("btrfs: initialize inode::file_extent_tree after i_mode has been set"), which fixes the previous commit. The following reproducer triggers the problem: $ cat test.sh MNT=/mnt/test DEV=/dev/vdb mkdir -p $MNT mkfs.btrfs -f -O ^no-holes $DEV mount $DEV $MNT touch $MNT/file1 fallocate -n -o 1M -l 2M $MNT/file1 umount $MNT mount $DEV $MNT len=$((1 * 1024 * 1024)) fallocate -o 1M -l $len $MNT/file1 du --bytes $MNT/file1 umount $MNT mount $DEV $MNT du --bytes $MNT/file1 umount $MNT Running the reproducer gives the following result: $ ./test.sh (...) 2097152 /mnt/test/file1 1048576 /mnt/test/file1 The difference is exactly 1048576 as we assigned. Fix by adding a call to btrfs_inode_set_file_extent_range() in btrfs_fallocate_update_isize(). Fixes: 41a2ee75aab0 ("btrfs: introduce per-inode file extent tree") Signed-off-by: austinchang Reviewed-by: Filipe Manana Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 7efd1f8a1912..fa82def46e39 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2854,12 +2854,22 @@ static int btrfs_fallocate_update_isize(struct inode *inode, { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + u64 range_start; + u64 range_end; int ret; int ret2; if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode)) return 0; + range_start = round_down(i_size_read(inode), root->fs_info->sectorsize); + range_end = round_up(end, root->fs_info->sectorsize); + + ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), range_start, + range_end - range_start); + if (ret) + return ret; + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) return PTR_ERR(trans); From a9fb41b5def8e1e0103d5fd1453787993587281e Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 24 Oct 2025 09:35:53 +0200 Subject: [PATCH 0594/1075] drm/ast: Clear preserved bits from register output value Preserve the I/O register bits in __ast_write8_i_masked() as specified by preserve_mask. Accidentally OR-ing the output value into these will overwrite the register's previous settings. Fixes display output on the AST2300, where the screen can go blank at boot. The driver's original commit 312fec1405dd ("drm: Initial KMS driver for AST (ASpeed Technologies) 2000 series (v2)") already added the broken code. Commit 6f719373b943 ("drm/ast: Blank with VGACR17 sync enable, always clear VGACRB6 sync off") triggered the bug. Signed-off-by: Thomas Zimmermann Reported-by: Peter Schneider Closes: https://lore.kernel.org/dri-devel/a40caf8e-58ad-4f9c-af7f-54f6f69c29bb@googlemail.com/ Tested-by: Peter Schneider Reviewed-by: Jocelyn Falempe Fixes: 6f719373b943 ("drm/ast: Blank with VGACR17 sync enable, always clear VGACRB6 sync off") Fixes: 312fec1405dd ("drm: Initial KMS driver for AST (ASpeed Technologies) 2000 series (v2)") Cc: Thomas Zimmermann Cc: Nick Bowler Cc: Douglas Anderson Cc: Dave Airlie Cc: Jocelyn Falempe Cc: dri-devel@lists.freedesktop.org Cc: # v3.5+ Link: https://patch.msgid.link/20251024073626.129032-1-tzimmermann@suse.de --- drivers/gpu/drm/ast/ast_drv.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index c15aef014f69..d41bd876167c 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -282,13 +282,13 @@ static inline void __ast_write8_i(void __iomem *addr, u32 reg, u8 index, u8 val) __ast_write8(addr, reg + 1, val); } -static inline void __ast_write8_i_masked(void __iomem *addr, u32 reg, u8 index, u8 read_mask, +static inline void __ast_write8_i_masked(void __iomem *addr, u32 reg, u8 index, u8 preserve_mask, u8 val) { - u8 tmp = __ast_read8_i_masked(addr, reg, index, read_mask); + u8 tmp = __ast_read8_i_masked(addr, reg, index, preserve_mask); - tmp |= val; - __ast_write8_i(addr, reg, index, tmp); + val &= ~preserve_mask; + __ast_write8_i(addr, reg, index, tmp | val); } static inline u32 ast_read32(struct ast_device *ast, u32 reg) From 83bac569c762651ac6dff9a86f54ecc13d911f7d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Oct 2025 17:17:02 +0200 Subject: [PATCH 0595/1075] xfs: prevent gc from picking the same zone twice When we are picking a zone for gc it might already be in the pipeline which can lead to us moving the same data twice resulting in in write amplification and a very unfortunate case where we keep on garbage collecting the zone we just filled with migrated data stopping all forward progress. Fix this by introducing a count of on-going GC operations on a zone, and skip any zone with ongoing GC when picking a new victim. Fixes: 080d01c41 ("xfs: implement zoned garbage collection") Signed-off-by: Hans Holmberg Co-developed-by: Hans Holmberg Signed-off-by: Christoph Hellwig Reviewed-by: Damien Le Moal Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Tested-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/libxfs/xfs_rtgroup.h | 6 ++++++ fs/xfs/xfs_zone_gc.c | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h index d36a6ae0abe5..d4fcf591e63d 100644 --- a/fs/xfs/libxfs/xfs_rtgroup.h +++ b/fs/xfs/libxfs/xfs_rtgroup.h @@ -50,6 +50,12 @@ struct xfs_rtgroup { uint8_t *rtg_rsum_cache; struct xfs_open_zone *rtg_open_zone; }; + + /* + * Count of outstanding GC operations for zoned XFS. Any RTG with a + * non-zero rtg_gccount will not be picked as new GC victim. + */ + atomic_t rtg_gccount; }; /* diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 109877d9a6bf..4ade54445532 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -114,6 +114,8 @@ struct xfs_gc_bio { /* Open Zone being written to */ struct xfs_open_zone *oz; + struct xfs_rtgroup *victim_rtg; + /* Bio used for reads and writes, including the bvec used by it */ struct bio_vec bv; struct bio bio; /* must be last */ @@ -264,6 +266,7 @@ xfs_zone_gc_iter_init( iter->rec_count = 0; iter->rec_idx = 0; iter->victim_rtg = victim_rtg; + atomic_inc(&victim_rtg->rtg_gccount); } /* @@ -362,6 +365,7 @@ xfs_zone_gc_query( return 0; done: + atomic_dec(&iter->victim_rtg->rtg_gccount); xfs_rtgroup_rele(iter->victim_rtg); iter->victim_rtg = NULL; return 0; @@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from( if (!rtg) continue; + /* + * If the zone is already undergoing GC, don't pick it again. + * + * This prevents us from picking one of the zones for which we + * already submitted GC I/O, but for which the remapping hasn't + * concluded yet. This won't cause data corruption, but + * increases write amplification and slows down GC, so this is + * a bad thing. + */ + if (atomic_read(&rtg->rtg_gccount)) { + xfs_rtgroup_rele(rtg); + continue; + } + /* skip zones that are just waiting for a reset */ if (rtg_rmap(rtg)->i_used_blocks == 0 || rtg_rmap(rtg)->i_used_blocks >= victim_used) { @@ -688,6 +706,9 @@ xfs_zone_gc_start_chunk( chunk->scratch = &data->scratch[data->scratch_idx]; chunk->data = data; chunk->oz = oz; + chunk->victim_rtg = iter->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock); bio->bi_end_io = xfs_zone_gc_end_io; @@ -710,6 +731,8 @@ static void xfs_zone_gc_free_chunk( struct xfs_gc_bio *chunk) { + atomic_dec(&chunk->victim_rtg->rtg_gccount); + xfs_rtgroup_rele(chunk->victim_rtg); list_del(&chunk->entry); xfs_open_zone_put(chunk->oz); xfs_irele(chunk->ip); @@ -770,6 +793,10 @@ xfs_zone_gc_split_write( split_chunk->oz = chunk->oz; atomic_inc(&chunk->oz->oz_ref); + split_chunk->victim_rtg = chunk->victim_rtg; + atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref); + atomic_inc(&chunk->victim_rtg->rtg_gccount); + chunk->offset += split_len; chunk->len -= split_len; chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len); From 0db22d7ee462c42c1284e98d47840932792c1adb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 23 Oct 2025 17:17:03 +0200 Subject: [PATCH 0596/1075] xfs: document another racy GC case in xfs_zoned_map_extent Besides blocks being invalidated, there is another case when the original mapping could have changed between querying the rmap for GC and calling xfs_zoned_map_extent. Document it there as it took us quite some time to figure out what is going on while developing the multiple-GC protection fix. Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Damien Le Moal Reviewed-by: Carlos Maiolino Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 23cdab4515bb..040402240807 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -246,6 +246,14 @@ xfs_zoned_map_extent( * If a data write raced with this GC write, keep the existing data in * the data fork, mark our newly written GC extent as reclaimable, then * move on to the next extent. + * + * Note that this can also happen when racing with operations that do + * not actually invalidate the data, but just move it to a different + * inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the + * inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE). If the + * data was just moved around, GC fails to free the zone, but the zone + * becomes a GC candidate again as soon as all previous GC I/O has + * finished and these blocks will be moved out eventually. */ if (old_startblock != NULLFSBLOCK && old_startblock != data.br_startblock) From b1c9390f0a44566f7cbd2db979e2ca5393767d49 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 31 Oct 2025 10:37:40 +0000 Subject: [PATCH 0597/1075] Revert "rtc: tps6586x: Fix initial enable_irq/disable_irq balance" Commit 1502fe0e97be ("rtc: tps6586x: Fix initial enable_irq/disable_irq balance") breaks the wake-up alarm for the tps6586x. After this commit was added RTC wake ups from suspend stopped working on the Tegra20 Ventana platform. The problem is that this change set the 'irq_en' variable to true prior to calling devm_request_threaded_irq() to indicate that the IRQ is enabled, however, it was over looked that the flag IRQ_NOAUTOEN is already set meaning that the IRQ is not enabled by default. This prevents the IRQ from being enabled as expected. Revert this change to fix this. Fixes: 1502fe0e97be ("rtc: tps6586x: Fix initial enable_irq/disable_irq balance") Signed-off-by: Jon Hunter Link: https://patch.msgid.link/20251031103741.945460-1-jonathanh@nvidia.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-tps6586x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c index 76ecf7b798f0..54c8429b16bf 100644 --- a/drivers/rtc/rtc-tps6586x.c +++ b/drivers/rtc/rtc-tps6586x.c @@ -258,7 +258,6 @@ static int tps6586x_rtc_probe(struct platform_device *pdev) irq_set_status_flags(rtc->irq, IRQ_NOAUTOEN); - rtc->irq_en = true; ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL, tps6586x_rtc_irq, IRQF_ONESHOT, From 0d510778c2f4913b5ca062b8a538929bff94e0be Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 31 Oct 2025 10:37:41 +0000 Subject: [PATCH 0598/1075] Revert "rtc: cpcap: Fix initial enable_irq/disable_irq balance" Commit e0762fd26ad6 ("rtc: cpcap: Fix initial enable_irq/disable_irq balance") set 'alarm_enabled' prior to calling the function devm_request_threaded_irq() because this enables the IRQ. However, right after calling devm_request_threaded_irq(), the driver calls disable_irq() to disable the IRQ and so now 'alarm_enabled' will be true but the IRQ is actually disabled. Revert this commit to fix the 'alarm_enabled' state. Fixes: e0762fd26ad6 ("rtc: cpcap: Fix initial enable_irq/disable_irq balance") Signed-off-by: Jon Hunter Link: https://patch.msgid.link/20251031103741.945460-2-jonathanh@nvidia.com Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-cpcap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/rtc/rtc-cpcap.c b/drivers/rtc/rtc-cpcap.c index 8b6b35716f53..c170345ac076 100644 --- a/drivers/rtc/rtc-cpcap.c +++ b/drivers/rtc/rtc-cpcap.c @@ -268,7 +268,6 @@ static int cpcap_rtc_probe(struct platform_device *pdev) return err; rtc->alarm_irq = platform_get_irq(pdev, 0); - rtc->alarm_enabled = true; err = devm_request_threaded_irq(dev, rtc->alarm_irq, NULL, cpcap_rtc_alarm_irq, IRQF_TRIGGER_NONE | IRQF_ONESHOT, From 0d92a3eaa6726e64a18db74ece806c2c021aaac3 Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Fri, 31 Oct 2025 10:48:26 +0100 Subject: [PATCH 0599/1075] null_blk: set dma alignment to logical block size This driver assumes that bio vectors are memory aligned to the logical block size, so set the queue limit to reflect that. Unless we set up the limit based on the logical block size, we will go out of page bounds in copy_to_nullb / copy_from_nullb. Apparently this wasn't noticed so far because none of the tests generate such buffers, but since commit 851c4c96db00 ("xfs: implement XFS_IOC_DIOINFO in terms of vfs_getattr") xfstests generates unaligned I/O, which now lead to memory corruption when using null_blk devices with 4k block size. Fixes: bf8d08532bc1 ("iomap: add support for dma aligned direct-io") Fixes: b1a000d3b8ec ("block: relax direct io memory alignment") Reviewed-by: Christoph Hellwig Reviewed-by: Keith Busch Signed-off-by: Hans Holmberg Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/block/null_blk/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index f982027e8c85..0ee55f889cfd 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -1949,6 +1949,7 @@ static int null_add_dev(struct nullb_device *dev) .logical_block_size = dev->blocksize, .physical_block_size = dev->blocksize, .max_hw_sectors = dev->max_sectors, + .dma_alignment = dev->blocksize - 1, }; struct nullb *nullb; From 5c5f1f64681cc889d9b13e4a61285e9e029d6ab5 Mon Sep 17 00:00:00 2001 From: Raphael Pinsonneault-Thibeault Date: Fri, 24 Oct 2025 12:29:10 -0400 Subject: [PATCH 0600/1075] Bluetooth: hci_event: validate skb length for unknown CC opcode In hci_cmd_complete_evt(), if the command complete event has an unknown opcode, we assume the first byte of the remaining skb->data contains the return status. However, parameter data has previously been pulled in hci_event_func(), which may leave the skb empty. If so, using skb->data[0] for the return status uses un-init memory. The fix is to check skb->len before using skb->data. Reported-by: syzbot+a9a4bedfca6aa9d7fa24@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a9a4bedfca6aa9d7fa24 Tested-by: syzbot+a9a4bedfca6aa9d7fa24@syzkaller.appspotmail.com Fixes: afcb3369f46ed ("Bluetooth: hci_event: Fix vendor (unknown) opcode status handling") Signed-off-by: Raphael Pinsonneault-Thibeault Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_event.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d37db364acf7..f20c826509b6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -4218,6 +4218,13 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, void *data, } if (i == ARRAY_SIZE(hci_cc_table)) { + if (!skb->len) { + bt_dev_err(hdev, "Unexpected cc 0x%4.4x with no status", + *opcode); + *status = HCI_ERROR_UNSPECIFIED; + return; + } + /* Unknown opcode, assume byte 0 contains the status, so * that e.g. __hci_cmd_sync() properly returns errors * for vendor specific commands send by HCI drivers. From 1c21cf89a66413eb04b2d22c955b7a50edc14dfa Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 23:26:30 +0530 Subject: [PATCH 0601/1075] Bluetooth: btrtl: Fix memory leak in rtlbt_parse_firmware_v2() The memory allocated for ptr using kvmalloc() is not freed on the last error path. Fix that by freeing it on that error path. Fixes: 9a24ce5e29b1 ("Bluetooth: btrtl: Firmware format v2 support") Signed-off-by: Abdun Nihaal Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btrtl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 6abd962502e3..1d4a7887abcc 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -625,8 +625,10 @@ static int rtlbt_parse_firmware_v2(struct hci_dev *hdev, len += entry->len; } - if (!len) + if (!len) { + kvfree(ptr); return -EPERM; + } *_buf = ptr; return len; From 8d59fba49362c65332395789fd82771f1028d87e Mon Sep 17 00:00:00 2001 From: Ilia Gavrilov Date: Mon, 20 Oct 2025 15:12:55 +0000 Subject: [PATCH 0602/1075] Bluetooth: MGMT: Fix OOB access in parse_adv_monitor_pattern() In the parse_adv_monitor_pattern() function, the value of the 'length' variable is currently limited to HCI_MAX_EXT_AD_LENGTH(251). The size of the 'value' array in the mgmt_adv_pattern structure is 31. If the value of 'pattern[i].length' is set in the user space and exceeds 31, the 'patterns[i].value' array can be accessed out of bound when copied. Increasing the size of the 'value' array in the 'mgmt_adv_pattern' structure will break the userspace. Considering this, and to avoid OOB access revert the limits for 'offset' and 'length' back to the value of HCI_MAX_AD_LENGTH. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: db08722fc7d4 ("Bluetooth: hci_core: Fix missing instances using HCI_MAX_AD_LENGTH") Cc: stable@vger.kernel.org Signed-off-by: Ilia Gavrilov Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 +- net/bluetooth/mgmt.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index bca0333f1e99..f5be96f08b9d 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -780,7 +780,7 @@ struct mgmt_adv_pattern { __u8 ad_type; __u8 offset; __u8 length; - __u8 value[31]; + __u8 value[HCI_MAX_AD_LENGTH]; } __packed; #define MGMT_OP_ADD_ADV_PATTERNS_MONITOR 0x0052 diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 24e335e3a727..79762bfaea5f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -5395,9 +5395,9 @@ static u8 parse_adv_monitor_pattern(struct adv_monitor *m, u8 pattern_count, for (i = 0; i < pattern_count; i++) { offset = patterns[i].offset; length = patterns[i].length; - if (offset >= HCI_MAX_EXT_AD_LENGTH || - length > HCI_MAX_EXT_AD_LENGTH || - (offset + length) > HCI_MAX_EXT_AD_LENGTH) + if (offset >= HCI_MAX_AD_LENGTH || + length > HCI_MAX_AD_LENGTH || + (offset + length) > HCI_MAX_AD_LENGTH) return MGMT_STATUS_INVALID_PARAMS; p = kmalloc(sizeof(*p), GFP_KERNEL); From 14a7f2392f42bbb71c1a5ea68930006221fcd80a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 29 Oct 2025 11:36:46 -0700 Subject: [PATCH 0603/1075] bpf: Make migrate_disable always inline to avoid partial inlining The build fails with llvm 21/22: $ make LLVM=1 -j ... LD vmlinux.o GEN .vmlinux.objs ... BTF .tmp_vmlinux1.btf.o ... AS .tmp_vmlinux2.kallsyms.o LD vmlinux.unstripped BTFIDS vmlinux.unstripped WARN: resolve_btfids: unresolved symbol migrate_enable WARN: resolve_btfids: unresolved symbol migrate_disable make[2]: *** [vmlinux.unstripped] Error 255 make[2]: *** Deleting file 'vmlinux.unstripped' make[1]: *** [Makefile:1242: vmlinux] Error 2 make: *** [Makefile:248: __sub-make] Error 2 Two functions with identical names but different addresses are considered ambiguous and removed by "pahole" from vmlinux BTF. Later resolve_btfids warns since it cannot find them. Commit 378b7708194f ("sched: Make migrate_{en,dis}able() inline") made them inlineable in most places, but in vmlinux built with llvm 21 and 22 there are four symbols for migrate_{enable,disable}: three static functions and one global function. Fix the issue by marking migrate_{enable,disable} as always inline. The alternative is to mark them as notrace/nokprobe which is more drastic. Only bpf programs are prevented from attaching to these functions. The rest of the tracing shouldn't be affected. [note: Peter ok-ed the patch, Alexei rewrote commit log] Fixes: 378b7708194f ("sched: Make migrate_{en,dis}able() inline") Signed-off-by: Yonghong Song Acked-by: Menglong Dong Link: https://lore.kernel.org/r/20251029183646.3811774-1-yonghong.song@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index cbb7340c5866..b469878de25c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2407,12 +2407,12 @@ static inline void __migrate_enable(void) { } * be defined in kernel/sched/core.c. */ #ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE -static inline void migrate_disable(void) +static __always_inline void migrate_disable(void) { __migrate_disable(); } -static inline void migrate_enable(void) +static __always_inline void migrate_enable(void) { __migrate_enable(); } From be708ed300e1ebd32978b4092b909f0d9be0958f Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 30 Oct 2025 12:17:14 +0000 Subject: [PATCH 0604/1075] bpf/arm64: Fix BPF_ST into arena memory The arm64 JIT supports BPF_ST with BPF_PROBE_MEM32 (arena) by using the tmp2 register to hold the dst + arena_vm_base value and using tmp2 as the new dst register. But this is broken because in case is_lsi_offset() returns false the tmp2 will be clobbered by emit_a64_mov_i(1, tmp2, off, ctx); and hence the emitted store instruction will be of the form: strb w10, [x11, x11] Fix this by using the third temporary register to hold the dst + arena_vm_base. Fixes: 339af577ec05 ("bpf: Add arm64 JIT support for PROBE_MEM32 pseudo instructions.") Signed-off-by: Puranjay Mohan Link: https://lore.kernel.org/r/20251030121715.55214-1-puranjay@kernel.org Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ab83089c3d8f..0c9a50a1e73e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1213,6 +1213,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u8 src = bpf2a64[insn->src_reg]; const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; + const u8 tmp3 = bpf2a64[TMP_REG_3]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; const u8 priv_sp = bpf2a64[PRIVATE_SP]; @@ -1757,8 +1758,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_ST | BPF_PROBE_MEM32 | BPF_W: case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { - emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); - dst = tmp2; + emit(A64_ADD(1, tmp3, dst, arena_vm_base), ctx); + dst = tmp3; } if (dst == fp) { dst_adj = ctx->priv_sp_used ? priv_sp : A64_SP; From 057f1652feb9be78587dae53866371e7fcce81e9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 24 Oct 2025 14:41:40 -0500 Subject: [PATCH 0605/1075] Revert "PCI: qcom: Remove custom ASPM enablement code" This reverts commit a729c16646198872e345bf6c48dbe540ad8a9753. Prior to a729c1664619 ("PCI: qcom: Remove custom ASPM enablement code"), the qcom controller driver enabled ASPM, including L0s, L1, and L1 PM Substates, for all devices powered on at the time the controller driver enumerates them. ASPM was *not* enabled for devices powered on later by pwrctrl (unless the kernel was built with PCIEASPM_POWERSAVE or PCIEASPM_POWER_SUPERSAVE, or the user enabled ASPM via module parameter or sysfs). After f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms"), the PCI core enabled all ASPM states for all devices whether powered on initially or by pwrctrl, so a729c1664619 was unnecessary and reverted. But f3ac2ff14834 was too aggressive and broke platforms that didn't support CLKREQ# or required device-specific configuration for L1 Substates, so df5192d9bb0e ("PCI/ASPM: Enable only L0s and L1 for devicetree platforms") enabled only L0s and L1. On Qualcomm platforms, this left L1 Substates disabled, which was a regression. Revert a729c1664619 so L1 Substates will be enabled on devices that are initially powered on. Devices powered on by pwrctrl will be addressed later. Fixes: df5192d9bb0e ("PCI/ASPM: Enable only L0s and L1 for devicetree platforms") Reported-by: Johan Hovold Closes: https://lore.kernel.org/lkml/aPuXZlaawFmmsLmX@hovoldconsulting.com/ Signed-off-by: Bjorn Helgaas Tested-by: Johan Hovold Reviewed-by: Manivannan Sadhasivam Link: https://patch.msgid.link/20251024210514.1365996-1-helgaas@kernel.org --- drivers/pci/controller/dwc/pcie-qcom.c | 32 ++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 805edbbfe7eb..e6d2a6b0c087 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -262,6 +262,7 @@ struct qcom_pcie_ops { int (*get_resources)(struct qcom_pcie *pcie); int (*init)(struct qcom_pcie *pcie); int (*post_init)(struct qcom_pcie *pcie); + void (*host_post_init)(struct qcom_pcie *pcie); void (*deinit)(struct qcom_pcie *pcie); void (*ltssm_enable)(struct qcom_pcie *pcie); int (*config_sid)(struct qcom_pcie *pcie); @@ -1094,6 +1095,25 @@ static int qcom_pcie_post_init_2_7_0(struct qcom_pcie *pcie) return 0; } +static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata) +{ + /* + * Downstream devices need to be in D0 state before enabling PCI PM + * substates. + */ + pci_set_power_state_locked(pdev, PCI_D0); + pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL); + + return 0; +} + +static void qcom_pcie_host_post_init_2_7_0(struct qcom_pcie *pcie) +{ + struct dw_pcie_rp *pp = &pcie->pci->pp; + + pci_walk_bus(pp->bridge->bus, qcom_pcie_enable_aspm, NULL); +} + static void qcom_pcie_deinit_2_7_0(struct qcom_pcie *pcie) { struct qcom_pcie_resources_2_7_0 *res = &pcie->res.v2_7_0; @@ -1380,9 +1400,19 @@ static void qcom_pcie_host_deinit(struct dw_pcie_rp *pp) pcie->cfg->ops->deinit(pcie); } +static void qcom_pcie_host_post_init(struct dw_pcie_rp *pp) +{ + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct qcom_pcie *pcie = to_qcom_pcie(pci); + + if (pcie->cfg->ops->host_post_init) + pcie->cfg->ops->host_post_init(pcie); +} + static const struct dw_pcie_host_ops qcom_pcie_dw_ops = { .init = qcom_pcie_host_init, .deinit = qcom_pcie_host_deinit, + .post_init = qcom_pcie_host_post_init, }; /* Qcom IP rev.: 2.1.0 Synopsys IP rev.: 4.01a */ @@ -1444,6 +1474,7 @@ static const struct qcom_pcie_ops ops_1_9_0 = { .get_resources = qcom_pcie_get_resources_2_7_0, .init = qcom_pcie_init_2_7_0, .post_init = qcom_pcie_post_init_2_7_0, + .host_post_init = qcom_pcie_host_post_init_2_7_0, .deinit = qcom_pcie_deinit_2_7_0, .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, .config_sid = qcom_pcie_config_sid_1_9_0, @@ -1454,6 +1485,7 @@ static const struct qcom_pcie_ops ops_1_21_0 = { .get_resources = qcom_pcie_get_resources_2_7_0, .init = qcom_pcie_init_2_7_0, .post_init = qcom_pcie_post_init_2_7_0, + .host_post_init = qcom_pcie_host_post_init_2_7_0, .deinit = qcom_pcie_deinit_2_7_0, .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, }; From 437aa64c8e32b724fc6d60100ef0eb313d32c88f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Mon, 27 Oct 2025 15:24:23 +0200 Subject: [PATCH 0606/1075] PCI: Do not size non-existing prefetchable window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pbus_size_mem() should only be called for bridge windows that exist but __pci_bus_size_bridges() may point 'pref' to a resource that does not exist (has zero flags) in case of non-root buses. When prefetchable bridge window does not exist, the same non-prefetchable bridge window is sized more than once which may result in duplicating entries into the realloc_head list. Duplicated entries are shown in this log and trigger a WARN_ON() because realloc_head had residual entries after the resource assignment algorithm: pci 0000:00:03.0: [11ab:6820] type 01 class 0x060400 PCIe Root Port pci 0000:00:03.0: PCI bridge to [bus 00] pci 0000:00:03.0: bridge window [io 0x0000-0x0fff] pci 0000:00:03.0: bridge window [mem 0x00000000-0x000fffff] pci 0000:00:03.0: bridge window [mem 0x00200000-0x003fffff] to [bus 02] add_size 200000 add_align 200000 pci 0000:00:03.0: bridge window [mem 0x00200000-0x003fffff] to [bus 02] add_size 200000 add_align 200000 pci 0000:00:03.0: bridge window [mem 0xe0000000-0xe03fffff]: assigned pci 0000:00:03.0: PCI bridge to [bus 02] pci 0000:00:03.0: bridge window [mem 0xe0000000-0xe03fffff] ------------[ cut here ]------------ WARNING: CPU: 0 PID: 1 at drivers/pci/setup-bus.c:2373 pci_assign_unassigned_root_bus_resources+0x1bc/0x234 Check resource flags of 'pref' and only size the prefetchable window if the resource has the IORESOURCE_PREFETCH flag. Fixes: ae88d0b9c57f ("PCI: Use pbus_select_window_for_type() during mem window sizing") Reported-by: Klaus Kudielka Closes: https://lore.kernel.org/r/51e8cf1c62b8318882257d6b5a9de7fdaaecc343.camel@gmail.com/ Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Tested-by: Klaus Kudielka Link: https://patch.msgid.link/20251027132423.8841-1-ilpo.jarvinen@linux.intel.com --- drivers/pci/setup-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4a8735b275e4..3645f392a9fd 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1604,7 +1604,7 @@ void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head) pbus_size_io(bus, realloc_head ? 0 : additional_io_size, additional_io_size, realloc_head); - if (pref) { + if (pref && (pref->flags & IORESOURCE_PREFETCH)) { pbus_size_mem(bus, IORESOURCE_MEM | IORESOURCE_PREFETCH | (pref->flags & IORESOURCE_MEM_64), From 9b041a4b66b3b62c30251e700b5688324cf66625 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 13 Oct 2025 14:27:36 -0700 Subject: [PATCH 0607/1075] x86/mm: Ensure clear_page() variants always have __kcfi_typeid_ symbols When building with CONFIG_CFI=y and CONFIG_LTO_CLANG_FULL=y, there is a series of errors from the various versions of clear_page() not having __kcfi_typeid_ symbols. $ cat kernel/configs/repro.config CONFIG_CFI=y # CONFIG_LTO_NONE is not set CONFIG_LTO_CLANG_FULL=y $ make -skj"$(nproc)" ARCH=x86_64 LLVM=1 clean defconfig repro.config bzImage ld.lld: error: undefined symbol: __kcfi_typeid_clear_page_rep >>> referenced by ld-temp.o >>> vmlinux.o:(__cfi_clear_page_rep) ld.lld: error: undefined symbol: __kcfi_typeid_clear_page_orig >>> referenced by ld-temp.o >>> vmlinux.o:(__cfi_clear_page_orig) ld.lld: error: undefined symbol: __kcfi_typeid_clear_page_erms >>> referenced by ld-temp.o >>> vmlinux.o:(__cfi_clear_page_erms) With full LTO, it is possible for LLVM to realize that these functions never have their address taken (as they are only used within an alternative, which will make them a direct call) across the whole kernel and either drop or skip generating their kCFI type identification symbols. clear_page_{rep,orig,erms}() are defined in clear_page_64.S with SYM_TYPED_FUNC_START as a result of 2981557cb040 ("x86,kcfi: Fix EXPORT_SYMBOL vs kCFI"), as exported functions are free to be called indirectly thus need kCFI type identifiers. Use KCFI_REFERENCE with these clear_page() functions to force LLVM to see these functions as address-taken and generate then keep the kCFI type identifiers. Fixes: 2981557cb040 ("x86,kcfi: Fix EXPORT_SYMBOL vs kCFI") Closes: https://github.com/ClangBuiltLinux/linux/issues/2128 Signed-off-by: Nathan Chancellor Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Sami Tolvanen Link: https://patch.msgid.link/20251013-x86-fix-clear_page-cfi-full-lto-errors-v1-1-d69534c0be61@kernel.org --- arch/x86/include/asm/page_64.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 015d23f3e01f..53f4089333f2 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -43,6 +43,9 @@ extern unsigned long __phys_addr_symbol(unsigned long); void clear_page_orig(void *page); void clear_page_rep(void *page); void clear_page_erms(void *page); +KCFI_REFERENCE(clear_page_orig); +KCFI_REFERENCE(clear_page_rep); +KCFI_REFERENCE(clear_page_erms); static inline void clear_page(void *page) { From 6ab753b5d8e521616cd9bd10b09891cbeb7e0235 Mon Sep 17 00:00:00 2001 From: Tim Hostetler Date: Wed, 29 Oct 2025 11:45:39 -0700 Subject: [PATCH 0608/1075] gve: Implement gettimex64 with -EOPNOTSUPP gve implemented a ptp_clock for sole use of do_aux_work at this time. ptp_clock_gettime() and ptp_sys_offset() assume every ptp_clock has implemented either gettimex64 or gettime64. Stub gettimex64 and return -EOPNOTSUPP to prevent NULL dereferencing. Fixes: acd16380523b ("gve: Add initial PTP device support") Reported-by: syzbot+c8c0e7ccabd456541612@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c8c0e7ccabd456541612 Signed-off-by: Tim Hostetler Reviewed-by: Harshitha Ramamurthy Reviewed-by: Kuniyuki Iwashima Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251029184555.3852952-2-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ptp.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c index e96247c9d68d..19ae699d4b18 100644 --- a/drivers/net/ethernet/google/gve/gve_ptp.c +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -26,6 +26,13 @@ int gve_clock_nic_ts_read(struct gve_priv *priv) return 0; } +static int gve_ptp_gettimex64(struct ptp_clock_info *info, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + return -EOPNOTSUPP; +} + static long gve_ptp_do_aux_work(struct ptp_clock_info *info) { const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); @@ -47,6 +54,7 @@ static long gve_ptp_do_aux_work(struct ptp_clock_info *info) static const struct ptp_clock_info gve_ptp_caps = { .owner = THIS_MODULE, .name = "gve clock", + .gettimex64 = gve_ptp_gettimex64, .do_aux_work = gve_ptp_do_aux_work, }; From 329d050bbe63c2999f657cf2d3855be11a473745 Mon Sep 17 00:00:00 2001 From: Tim Hostetler Date: Wed, 29 Oct 2025 11:45:40 -0700 Subject: [PATCH 0609/1075] gve: Implement settime64 with -EOPNOTSUPP ptp_clock_settime() assumes every ptp_clock has implemented settime64(). Stub it with -EOPNOTSUPP to prevent a NULL dereference. Fixes: acd16380523b ("gve: Add initial PTP device support") Reported-by: syzbot+a546141ca6d53b90aba3@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=a546141ca6d53b90aba3 Signed-off-by: Tim Hostetler Reviewed-by: Kuniyuki Iwashima Signed-off-by: Joshua Washington Link: https://patch.msgid.link/20251029184555.3852952-3-joshwash@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_ptp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c index 19ae699d4b18..a384a9ed4914 100644 --- a/drivers/net/ethernet/google/gve/gve_ptp.c +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -33,6 +33,12 @@ static int gve_ptp_gettimex64(struct ptp_clock_info *info, return -EOPNOTSUPP; } +static int gve_ptp_settime64(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + return -EOPNOTSUPP; +} + static long gve_ptp_do_aux_work(struct ptp_clock_info *info) { const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); @@ -55,6 +61,7 @@ static const struct ptp_clock_info gve_ptp_caps = { .owner = THIS_MODULE, .name = "gve clock", .gettimex64 = gve_ptp_gettimex64, + .settime64 = gve_ptp_settime64, .do_aux_work = gve_ptp_do_aux_work, }; From 5a89b27afd3d010680f9355f7ff5b048cfe89333 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Wed, 29 Oct 2025 10:38:13 +0200 Subject: [PATCH 0610/1075] ptp: Allow exposing cycles only for clocks with free-running counter The PTP core falls back to gettimex64 and getcrosststamp when getcycles64 or getcyclesx64 are not implemented. This causes the CYCLES ioctls to retrieve PHC real time instead of free-running cycles. Reject PTP_SYS_OFFSET_{PRECISE,EXTENDED}_CYCLES for clocks without free-running counter support since the result would represent PHC real time and system time rather than cycles and system time. Fixes: faf23f54d366 ("ptp: Add ioctl commands to expose raw cycle counter values") Signed-off-by: Carolina Jubran Reviewed-by: Dragos Tatulea Reviewed-by: Tariq Toukan Reviewed-by: Pavan Chebbi Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20251029083813.2276997-1-cjubran@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_chardev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 8106eb617c8c..c61cf9edac48 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -561,10 +561,14 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, return ptp_mask_en_single(pccontext->private_clkdata, argptr); case PTP_SYS_OFFSET_PRECISE_CYCLES: + if (!ptp->has_cycles) + return -EOPNOTSUPP; return ptp_sys_offset_precise(ptp, argptr, ptp->info->getcrosscycles); case PTP_SYS_OFFSET_EXTENDED_CYCLES: + if (!ptp->has_cycles) + return -EOPNOTSUPP; return ptp_sys_offset_extended(ptp, argptr, ptp->info->getcyclesx64); default: From 3d18a84eddde169d6dbf3c72cc5358b988c347d0 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 27 Oct 2025 20:46:21 +0100 Subject: [PATCH 0611/1075] net: dsa: tag_brcm: legacy: fix untagged rx on unbridged ports for bcm63xx The internal switch on BCM63XX SoCs will unconditionally add 802.1Q VLAN tags on egress to CPU when 802.1Q mode is enabled. We do this unconditionally since commit ed409f3bbaa5 ("net: dsa: b53: Configure VLANs while not filtering"). This is fine for VLAN aware bridges, but for standalone ports and vlan unaware bridges this means all packets are tagged with the default VID, which is 0. While the kernel will treat that like untagged, this can break userspace applications processing raw packets, expecting untagged traffic, like STP daemons. This also breaks several bridge tests, where the tcpdump output then does not match the expected output anymore. Since 0 isn't a valid VID, just strip out the VLAN tag if we encounter it, unless the priority field is set, since that would be a valid tag again. Fixes: 964dbf186eaa ("net: dsa: tag_brcm: add support for legacy tags") Signed-off-by: Jonas Gorski Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20251027194621.133301-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_brcm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 26bb657ceac3..d9c77fa553b5 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -224,12 +224,14 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, { int len = BRCM_LEG_TAG_LEN; int source_port; + __be16 *proto; u8 *brcm_tag; if (unlikely(!pskb_may_pull(skb, BRCM_LEG_TAG_LEN + VLAN_HLEN))) return NULL; brcm_tag = dsa_etype_header_pos_rx(skb); + proto = (__be16 *)(brcm_tag + BRCM_LEG_TAG_LEN); source_port = brcm_tag[5] & BRCM_LEG_PORT_ID; @@ -237,8 +239,12 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; - /* VLAN tag is added by BCM63xx internal switch */ - if (netdev_uses_dsa(skb->dev)) + /* The internal switch in BCM63XX SoCs always tags on egress on the CPU + * port. We use VID 0 internally for untagged traffic, so strip the tag + * if the TCI field is all 0, and keep it otherwise to also retain + * e.g. 802.1p tagged packets. + */ + if (proto[0] == htons(ETH_P_8021Q) && proto[1] == 0) len += VLAN_HLEN; /* Remove Broadcom tag and update checksum */ From 02d064de05b1fcca769391fa82d205bed8bb9bf0 Mon Sep 17 00:00:00 2001 From: Anubhav Singh Date: Thu, 30 Oct 2025 06:28:18 +0000 Subject: [PATCH 0612/1075] selftests/net: fix out-of-order delivery of FIN in gro:tcp test Due to the gro_sender sending data packets and FIN packets in very quick succession, these are received almost simultaneously by the gro_receiver. FIN packets are sometimes processed before the data packets leading to intermittent (~1/100) test failures. This change adds a delay of 100ms before sending FIN packets in gro:tcp test to avoid the out-of-order delivery. The same mitigation already exists for the gro:ip test. Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Reviewed-by: Willem de Bruijn Signed-off-by: Anubhav Singh Link: https://patch.msgid.link/20251030062818.1562228-1-anubhavsinggh@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gro.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 2b1d9f2b3e9e..3fa63bd85dea 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -989,6 +989,7 @@ static void check_recv_pkts(int fd, int *correct_payload, static void gro_sender(void) { + const int fin_delay_us = 100 * 1000; static char fin_pkt[MAX_HDR_LEN]; struct sockaddr_ll daddr = {}; int txfd = -1; @@ -1032,15 +1033,22 @@ static void gro_sender(void) write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "tcp") == 0) { send_changed_checksum(txfd, &daddr); + /* Adding sleep before sending FIN so that it is not + * received prior to other packets. + */ + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_seq(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_ts(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_diff_opt(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "ip") == 0) { send_changed_ECN(txfd, &daddr); From f8e8486702abb05b8c734093aab1606af0eac068 Mon Sep 17 00:00:00 2001 From: Anubhav Singh Date: Thu, 30 Oct 2025 06:04:36 +0000 Subject: [PATCH 0613/1075] selftests/net: use destination options instead of hop-by-hop The GRO self-test, gro.c, currently constructs IPv6 packets containing a Hop-by-Hop Options header (IPPROTO_HOPOPTS) to ensure the GRO path correctly handles IPv6 extension headers. However, network elements may be configured to drop packets with the Hop-by-Hop Options header (HBH). This causes the self-test to fail in environments where such network elements are present. To improve the robustness and reliability of this test in diverse network environments, switch from using IPPROTO_HOPOPTS to IPPROTO_DSTOPTS (Destination Options). The Destination Options header is less likely to be dropped by intermediate routers and still serves the core purpose of the test: validating GRO's handling of an IPv6 extension header. This change ensures the test can execute successfully without being incorrectly failed by network policies outside the kernel's control. Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Reviewed-by: Willem de Bruijn Signed-off-by: Anubhav Singh Link: https://patch.msgid.link/20251030060436.1556664-1-anubhavsinggh@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 3fa63bd85dea..cfc39f70635d 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -754,11 +754,11 @@ static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); } From 3f978e3f1570155a1327ffa25f60968bc7b9398f Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Thu, 30 Oct 2025 09:55:22 +0530 Subject: [PATCH 0614/1075] isdn: mISDN: hfcsusb: fix memory leak in hfcsusb_probe() In hfcsusb_probe(), the memory allocated for ctrl_urb gets leaked when setup_instance() fails with an error code. Fix that by freeing the urb before freeing the hw structure. Also change the error paths to use the goto ladder style. Compile tested only. Issue found using a prototype static analysis tool. Fixes: 69f52adb2d53 ("mISDN: Add HFC USB driver") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251030042524.194812-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/isdn/hardware/mISDN/hfcsusb.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c index e54419a4e731..541a20cb58f1 100644 --- a/drivers/isdn/hardware/mISDN/hfcsusb.c +++ b/drivers/isdn/hardware/mISDN/hfcsusb.c @@ -1904,13 +1904,13 @@ setup_instance(struct hfcsusb *hw, struct device *parent) mISDN_freebchannel(&hw->bch[1]); mISDN_freebchannel(&hw->bch[0]); mISDN_freedchannel(&hw->dch); - kfree(hw); return err; } static int hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + int err; struct hfcsusb *hw; struct usb_device *dev = interface_to_usbdev(intf); struct usb_host_interface *iface = intf->cur_altsetting; @@ -2101,20 +2101,28 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id) if (!hw->ctrl_urb) { pr_warn("%s: No memory for control urb\n", driver_info->vend_name); - kfree(hw); - return -ENOMEM; + err = -ENOMEM; + goto err_free_hw; } pr_info("%s: %s: detected \"%s\" (%s, if=%d alt=%d)\n", hw->name, __func__, driver_info->vend_name, conf_str[small_match], ifnum, alt_used); - if (setup_instance(hw, dev->dev.parent)) - return -EIO; + if (setup_instance(hw, dev->dev.parent)) { + err = -EIO; + goto err_free_urb; + } hw->intf = intf; usb_set_intfdata(hw->intf, hw); return 0; + +err_free_urb: + usb_free_urb(hw->ctrl_urb); +err_free_hw: + kfree(hw); + return err; } /* function called when an active device is removed */ From d01f8136d46b925798abcf86b35a4021e4cfb8bb Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Thu, 30 Oct 2025 12:03:40 +0800 Subject: [PATCH 0615/1075] selftests: netdevsim: Fix ethtool-coalesce.sh fail by installing ethtool-common.sh The script "ethtool-common.sh" is not installed in INSTALL_PATH, and triggers some errors when I try to run the test 'drivers/net/netdevsim/ethtool-coalesce.sh': TAP version 13 1..1 # timeout set to 600 # selftests: drivers/net/netdevsim: ethtool-coalesce.sh # ./ethtool-coalesce.sh: line 4: ethtool-common.sh: No such file or directory # ./ethtool-coalesce.sh: line 25: make_netdev: command not found # ethtool: bad command line argument(s) # ./ethtool-coalesce.sh: line 124: check: command not found # ./ethtool-coalesce.sh: line 126: [: -eq: unary operator expected # FAILED /0 checks not ok 1 selftests: drivers/net/netdevsim: ethtool-coalesce.sh # exit=1 Install this file to avoid this error. After this patch: TAP version 13 1..1 # timeout set to 600 # selftests: drivers/net/netdevsim: ethtool-coalesce.sh # PASSED all 22 checks ok 1 selftests: drivers/net/netdevsim: ethtool-coalesce.sh Fixes: fbb8531e58bd ("selftests: extract common functions in ethtool-common.sh") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20251030040340.3258110-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/netdevsim/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index daf51113c827..df10c7243511 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -20,4 +20,8 @@ TEST_PROGS := \ udp_tunnel_nic.sh \ # end of TEST_PROGS +TEST_FILES := \ + ethtool-common.sh +# end of TEST_FILES + include ../../../lib.mk From c211f5d7cbd5cb34489d526648bb9c8ecc907dee Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 30 Oct 2025 07:35:39 +0000 Subject: [PATCH 0616/1075] net: vlan: sync VLAN features with lower device After registering a VLAN device and setting its feature flags, we need to synchronize the VLAN features with the lower device. For example, the VLAN device does not have the NETIF_F_LRO flag, it should be synchronized with the lower device based on the NETIF_F_UPPER_DISABLES definition. As the dev->vlan_features has changed, we need to call netdev_update_features(). The caller must run after netdev_upper_dev_link() links the lower devices, so this patch adds the netdev_update_features() call in register_vlan_dev(). Fixes: fd867d51f889 ("net/core: generic support for disabling netdev features down stack") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251030073539.133779-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- net/8021q/vlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index fda3a80e9340..2b74ed56eb16 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -193,6 +193,8 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev); grp->nr_vlan_devs++; + netdev_update_features(dev); + return 0; out_unregister_netdev: From d7d2fcf7ae31471b4e08b7e448b8fd0ec2e06a1b Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Wed, 29 Oct 2025 13:50:24 -0700 Subject: [PATCH 0617/1075] netconsole: Acquire su_mutex before navigating configs hierarchy There is a race between operations that iterate over the userdata cg_children list and concurrent add/remove of userdata items through configfs. The update_userdata() function iterates over the nt->userdata_group.cg_children list, and count_extradata_entries() also iterates over this same list to count nodes. Quoting from Documentation/filesystems/configfs.rst: > A subsystem can navigate the cg_children list and the ci_parent pointer > to see the tree created by the subsystem. This can race with configfs' > management of the hierarchy, so configfs uses the subsystem mutex to > protect modifications. Whenever a subsystem wants to navigate the > hierarchy, it must do so under the protection of the subsystem > mutex. Without proper locking, if a userdata item is added or removed concurrently while these functions are iterating, the list can be accessed in an inconsistent state. For example, the list_for_each() loop can reach a node that is being removed from the list by list_del_init() which sets the nodes' .next pointer to point to itself, so the loop will never end (or reach the WARN_ON_ONCE in update_userdata() ). Fix this by holding the configfs subsystem mutex (su_mutex) during all operations that iterate over cg_children. This includes: - userdatum_value_store() which calls update_userdata() to iterate over cg_children - All sysdata_*_enabled_store() functions which call count_extradata_entries() to iterate over cg_children The su_mutex must be acquired before dynamic_netconsole_mutex to avoid potential lock ordering issues, as configfs operations may already hold su_mutex when calling into our code. Fixes: df03f830d099 ("net: netconsole: cache userdata formatted string in netconsole_target") Signed-off-by: Gustavo Luiz Duarte Link: https://patch.msgid.link/20251029-netconsole-fix-warn-v1-1-0d0dd4622f48@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 5d8d0214786c..bb6e03a92956 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -936,6 +936,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, if (count > MAX_EXTRADATA_VALUE_LEN) return -EMSGSIZE; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); ret = strscpy(udm->value, buf, sizeof(udm->value)); @@ -949,6 +950,7 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf, ret = count; out_unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -974,6 +976,7 @@ static ssize_t sysdata_msgid_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_MSGID); if (msgid_enabled == curr) @@ -994,6 +997,7 @@ static ssize_t sysdata_msgid_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1008,6 +1012,7 @@ static ssize_t sysdata_release_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_RELEASE); if (release_enabled == curr) @@ -1028,6 +1033,7 @@ static ssize_t sysdata_release_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1042,6 +1048,7 @@ static ssize_t sysdata_taskname_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_TASKNAME); if (taskname_enabled == curr) @@ -1062,6 +1069,7 @@ static ssize_t sysdata_taskname_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } @@ -1077,6 +1085,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, if (ret) return ret; + mutex_lock(&netconsole_subsys.su_mutex); mutex_lock(&dynamic_netconsole_mutex); curr = !!(nt->sysdata_fields & SYSDATA_CPU_NR); if (cpu_nr_enabled == curr) @@ -1105,6 +1114,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, ret = strnlen(buf, count); unlock: mutex_unlock(&dynamic_netconsole_mutex); + mutex_unlock(&netconsole_subsys.su_mutex); return ret; } From 3927c4a1084c48ef97f11281a0a43ecb2cb4d6f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Hork=C3=BD?= Date: Tue, 14 Oct 2025 17:49:32 +0200 Subject: [PATCH 0618/1075] kconfig/mconf: Initialize the default locale at startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix bug where make menuconfig doesn't initialize the default locale, which causes ncurses menu borders to be displayed incorrectly (lqqqqk) in UTF-8 terminals that don't support VT100 ACS by default, such as PuTTY. Signed-off-by: Jakub Horký Link: https://patch.msgid.link/20251014154933.3990990-1-jakub.git@horky.net [nathan: Alphabetize locale.h include] Signed-off-by: Nathan Chancellor --- scripts/kconfig/mconf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c index 84ea9215c0a7..b8b7bba84a65 100644 --- a/scripts/kconfig/mconf.c +++ b/scripts/kconfig/mconf.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -931,6 +932,8 @@ int main(int ac, char **av) signal(SIGINT, sig_handler); + setlocale(LC_ALL, ""); + if (ac > 1 && strcmp(av[1], "-s") == 0) { silent = 1; /* Silence conf_read() until the real callback is set up */ From 43c2931a95e6b295bfe9e3b90dbe0f7596933e91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Hork=C3=BD?= Date: Tue, 14 Oct 2025 16:44:06 +0200 Subject: [PATCH 0619/1075] kconfig/nconf: Initialize the default locale at startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix bug where make nconfig doesn't initialize the default locale, which causes ncurses menu borders to be displayed incorrectly (lqqqqk) in UTF-8 terminals that don't support VT100 ACS by default, such as PuTTY. Signed-off-by: Jakub Horký Link: https://patch.msgid.link/20251014144405.3975275-2-jakub.git@horky.net [nathan: Alphabetize locale.h include] Signed-off-by: Nathan Chancellor --- scripts/kconfig/nconf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index ae1fe5f60327..521700ed7152 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -7,6 +7,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include #include #include #include @@ -1478,6 +1479,8 @@ int main(int ac, char **av) int lines, columns; char *mode; + setlocale(LC_ALL, ""); + if (ac > 1 && strcmp(av[1], "-s") == 0) { /* Silence conf_read() until the real callback is set up */ conf_set_message_callback(NULL); From c44b4b9eeb71f5b0b617abf6fd66d1ef0aab6200 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 29 Oct 2025 12:54:08 -0700 Subject: [PATCH 0620/1075] objtool: Fix skip_alt_group() for non-alternative STAC/CLAC If an insn->alt points to a STAC/CLAC instruction, skip_alt_group() assumes it's part of an alternative ("alt group") as opposed to some other kind of "alt" such as an exception fixup. While that assumption may hold true in the current code base, Linus has an out-of-tree patch which breaks that assumption by replacing the STAC/CLAC alternatives with raw STAC/CLAC instructions. Make skip_alt_group() more robust by making sure it's actually an alt group before continuing. Reported-by: Linus Torvalds Fixes: 2d12c6fb7875 ("objtool: Remove ANNOTATE_IGNORE_ALTERNATIVE from CLAC/STAC") Closes: https://lore.kernel.org/CAHk-=wi6goUT36sR8GE47_P-aVrd5g38=VTRHpktWARbyE-0ow@mail.gmail.com Signed-off-by: Josh Poimboeuf Signed-off-by: Ingo Molnar Tested-by: Linus Torvalds Link: https://patch.msgid.link/3d22415f7b8e06a64e0873b21f48389290eeaa49.1761767616.git.jpoimboe@kernel.org --- tools/objtool/check.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 620854fdaaf6..9004fbc06769 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3516,8 +3516,11 @@ static bool skip_alt_group(struct instruction *insn) { struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL; + if (!insn->alt_group) + return false; + /* ANNOTATE_IGNORE_ALTERNATIVE */ - if (insn->alt_group && insn->alt_group->ignore) + if (insn->alt_group->ignore) return true; /* From 9882a40640036d5bbc590426a78981526d4f2345 Mon Sep 17 00:00:00 2001 From: Ariel D'Alessandro Date: Fri, 24 Oct 2025 17:27:56 -0300 Subject: [PATCH 0621/1075] drm/mediatek: Disable AFBC support on Mediatek DRM driver Commit c410fa9b07c3 ("drm/mediatek: Add AFBC support to Mediatek DRM driver") added AFBC support to Mediatek DRM and enabled the 32x8/split/sparse modifier. However, this is currently broken on Mediatek MT8188 (Genio 700 EVK platform); tested using upstream Kernel and Mesa (v25.2.1), AFBC is used by default since Mesa v25.0. Kernel trace reports vblank timeouts constantly, and the render is garbled: ``` [CRTC:62:crtc-0] vblank wait timed out WARNING: CPU: 7 PID: 70 at drivers/gpu/drm/drm_atomic_helper.c:1835 drm_atomic_helper_wait_for_vblanks.part.0+0x24c/0x27c [...] Hardware name: MediaTek Genio-700 EVK (DT) Workqueue: events_unbound commit_work pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : drm_atomic_helper_wait_for_vblanks.part.0+0x24c/0x27c lr : drm_atomic_helper_wait_for_vblanks.part.0+0x24c/0x27c sp : ffff80008337bca0 x29: ffff80008337bcd0 x28: 0000000000000061 x27: 0000000000000000 x26: 0000000000000001 x25: 0000000000000000 x24: ffff0000c9dcc000 x23: 0000000000000001 x22: 0000000000000000 x21: ffff0000c66f2f80 x20: ffff0000c0d7d880 x19: 0000000000000000 x18: 000000000000000a x17: 000000040044ffff x16: 005000f2b5503510 x15: 0000000000000000 x14: 0000000000000000 x13: 74756f2064656d69 x12: 742074696177206b x11: 0000000000000058 x10: 0000000000000018 x9 : ffff800082396a70 x8 : 0000000000057fa8 x7 : 0000000000000cce x6 : ffff8000823eea70 x5 : ffff0001fef5f408 x4 : ffff80017ccee000 x3 : ffff0000c12cb480 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff0000c12cb480 Call trace: drm_atomic_helper_wait_for_vblanks.part.0+0x24c/0x27c (P) drm_atomic_helper_commit_tail_rpm+0x64/0x80 commit_tail+0xa4/0x1a4 commit_work+0x14/0x20 process_one_work+0x150/0x290 worker_thread+0x2d0/0x3ec kthread+0x12c/0x210 ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- ``` Until this gets fixed upstream, disable AFBC support on this platform, as it's currently broken with upstream Mesa. Fixes: c410fa9b07c3 ("drm/mediatek: Add AFBC support to Mediatek DRM driver") Cc: stable@vger.kernel.org Signed-off-by: Ariel D'Alessandro Reviewed-by: Daniel Stone Reviewed-by: CK Hu Reviewed-by: Macpaul Lin Link: https://patchwork.kernel.org/project/dri-devel/patch/20251024202756.811425-1-ariel.dalessandro@collabora.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_plane.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_plane.c b/drivers/gpu/drm/mediatek/mtk_plane.c index 02349bd44001..788b52c1d10c 100644 --- a/drivers/gpu/drm/mediatek/mtk_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_plane.c @@ -21,9 +21,6 @@ static const u64 modifiers[] = { DRM_FORMAT_MOD_LINEAR, - DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | - AFBC_FORMAT_MOD_SPLIT | - AFBC_FORMAT_MOD_SPARSE), DRM_FORMAT_MOD_INVALID, }; @@ -71,26 +68,7 @@ static bool mtk_plane_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) { - if (modifier == DRM_FORMAT_MOD_LINEAR) - return true; - - if (modifier != DRM_FORMAT_MOD_ARM_AFBC( - AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | - AFBC_FORMAT_MOD_SPLIT | - AFBC_FORMAT_MOD_SPARSE)) - return false; - - if (format != DRM_FORMAT_XRGB8888 && - format != DRM_FORMAT_ARGB8888 && - format != DRM_FORMAT_BGRX8888 && - format != DRM_FORMAT_BGRA8888 && - format != DRM_FORMAT_ABGR8888 && - format != DRM_FORMAT_XBGR8888 && - format != DRM_FORMAT_RGB888 && - format != DRM_FORMAT_BGR888) - return false; - - return true; + return modifier == DRM_FORMAT_MOD_LINEAR; } static void mtk_plane_destroy_state(struct drm_plane *plane, From afcfb6c8474d9e750880aaa77952cc588f859613 Mon Sep 17 00:00:00 2001 From: Jason-JH Lin Date: Fri, 29 Aug 2025 17:15:59 +0800 Subject: [PATCH 0622/1075] drm/mediatek: Add pm_runtime support for GCE power control Call pm_runtime_resume_and_get() before accessing GCE hardware in mbox_send_message(), and invoke pm_runtime_put_autosuspend() in the cmdq callback to release the PM reference and start autosuspend for GCE. This ensures correct power management for the GCE device. Fixes: 8afe816b0c99 ("mailbox: mtk-cmdq-mailbox: Implement Runtime PM with autosuspend") Signed-off-by: Jason-JH Lin Reviewed-by: CK Hu Link: https://patchwork.kernel.org/project/dri-devel/patch/20250829091727.3745415-3-jason-jh.lin@mediatek.com/ Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_crtc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_crtc.c b/drivers/gpu/drm/mediatek/mtk_crtc.c index bc7527542fdc..c4c6d0249df5 100644 --- a/drivers/gpu/drm/mediatek/mtk_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_crtc.c @@ -283,6 +283,10 @@ static void ddp_cmdq_cb(struct mbox_client *cl, void *mssg) unsigned int i; unsigned long flags; + /* release GCE HW usage and start autosuspend */ + pm_runtime_mark_last_busy(cmdq_cl->chan->mbox->dev); + pm_runtime_put_autosuspend(cmdq_cl->chan->mbox->dev); + if (data->sta < 0) return; @@ -618,6 +622,9 @@ static void mtk_crtc_update_config(struct mtk_crtc *mtk_crtc, bool needs_vblank) mtk_crtc->config_updating = false; spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); + if (pm_runtime_resume_and_get(mtk_crtc->cmdq_client.chan->mbox->dev) < 0) + goto update_config_out; + mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0); goto update_config_out; From 3f673559795b2f556b8a9272c2724b79eee7a976 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Oct 2025 09:37:57 -0300 Subject: [PATCH 0623/1075] tools arch x86: Sync msr-index.h to pick AMD64_{PERF_CNTR_GLOBAL_STATUS_SET,SAVIC_CONTROL}, IA32_L3_QOS_{ABMC,EXT}_CFG To pick up the changes in: cdfed9370b96aaba ("KVM: x86/pmu: Move PMU_CAP_{FW_WRITES,LBR_FMT} into msr-index.h header") bc6397cf0bc4f2b7 ("x86/cpu/topology: Define AMD64_CPUID_EXT_FEAT MSR") 84ecefb766748916 ("x86/resctrl: Add data structures and definitions for ABMC assignment") faebbc58cde9d8f6 ("x86/resctrl: Add support to enable/disable AMD ABMC feature") c4074ab87f3483de ("x86/apic: Enable Secure AVIC in the control MSR") 869e36b9660dd72a ("x86/apic: Allow NMI to be injected from hypervisor for Secure AVIC") 30c2b98aa84c76f2 ("x86/apic: Add new driver for Secure AVIC") 0c5caea762de31a8 ("perf/x86: Add PERF_CAP_PEBS_TIMING_INFO flag") 68e61f6fd65610e7 ("KVM: SVM: Emulate PERF_CNTR_GLOBAL_STATUS_SET for PerfMonV2") a3c4f3396b82849a ("x86/msr-index: Add AMD workload classification MSRs") 65f55a30176662ee ("x86/CPU/AMD: Add CPUID faulting support") 17ec2f965344ee3f ("KVM: VMX: Allow guest to set DEBUGCTL.RTM_DEBUG if RTM is supported") Addressing this tools/perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h That makes the beautification scripts to pick some new entries: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after --- before 2025-10-30 09:34:49.283533597 -0300 +++ after 2025-10-30 09:35:00.971426811 -0300 @@ -272,6 +272,9 @@ [0xc0000300 - x86_64_specific_MSRs_offset] = "AMD64_PERF_CNTR_GLOBAL_STATUS", [0xc0000301 - x86_64_specific_MSRs_offset] = "AMD64_PERF_CNTR_GLOBAL_CTL", [0xc0000302 - x86_64_specific_MSRs_offset] = "AMD64_PERF_CNTR_GLOBAL_STATUS_CLR", + [0xc0000303 - x86_64_specific_MSRs_offset] = "AMD64_PERF_CNTR_GLOBAL_STATUS_SET", + [0xc00003fd - x86_64_specific_MSRs_offset] = "IA32_L3_QOS_ABMC_CFG", + [0xc00003ff - x86_64_specific_MSRs_offset] = "IA32_L3_QOS_EXT_CFG", [0xc0000400 - x86_64_specific_MSRs_offset] = "IA32_EVT_CFG_BASE", [0xc0000500 - x86_64_specific_MSRs_offset] = "AMD_WORKLOAD_CLASS_CONFIG", [0xc0000501 - x86_64_specific_MSRs_offset] = "AMD_WORKLOAD_CLASS_ID", @@ -319,6 +322,7 @@ [0xc0010133 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_RMP_END", [0xc0010134 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_GUEST_TSC_FREQ", [0xc0010136 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_RMP_CFG", + [0xc0010138 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_SAVIC_CONTROL", [0xc0010140 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_ID_LENGTH", [0xc0010141 - x86_AMD_V_KVM_MSRs_offset] = "AMD64_OSVW_STATUS", [0xc0010200 - x86_AMD_V_KVM_MSRs_offset] = "F15H_PERF_CTL", $ Now one can trace systemwide asking to see backtraces to where that MSR is being read/written: root@x1:~# perf trace -e msr:*_msr/max-stack=32/ --filter="msr==IA32_L3_QOS_ABMC_CFG" ^Croot@x1:~# If we use -v (verbose mode) we can see what it does behind the scenes: root@x1:~# perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_L3_QOS_ABMC_CFG" 0xc00003fd New filter for msr:write_msr: (msr==0xc00003fd) && (common_pid != 449842 && common_pid != 433756) 0xc00003fd New filter for msr:read_msr: (msr==0xc00003fd) && (common_pid != 449842 && common_pid != 433756) mmap size 528384B ^Croot@x1:~# Example with a frequent msr: # perf trace -v -e msr:*_msr/max-stack=32/ --filter="msr==IA32_SPEC_CTRL" --max-events 2 Using CPUID AuthenticAMD-25-21-0 0x48 New filter for msr:read_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) 0x48 New filter for msr:write_msr: (msr==0x48) && (common_pid != 2612129 && common_pid != 3841) mmap size 528384B Looking at the vmlinux_path (8 entries long) symsrc__init: build id mismatch for vmlinux. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols 0.000 Timer/2525383 msr:write_msr(msr: IA32_SPEC_CTRL, val: 6) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule ([kernel.kallsyms]) futex_wait_queue_me ([kernel.kallsyms]) futex_wait ([kernel.kallsyms]) do_futex ([kernel.kallsyms]) __x64_sys_futex ([kernel.kallsyms]) do_syscall_64 ([kernel.kallsyms]) entry_SYSCALL_64_after_hwframe ([kernel.kallsyms]) __futex_abstimed_wait_common64 (/usr/lib64/libpthread-2.33.so) 0.030 :0/0 msr:write_msr(msr: IA32_SPEC_CTRL, val: 2) do_trace_write_msr ([kernel.kallsyms]) do_trace_write_msr ([kernel.kallsyms]) __switch_to_xtra ([kernel.kallsyms]) __switch_to ([kernel.kallsyms]) __schedule ([kernel.kallsyms]) schedule_idle ([kernel.kallsyms]) do_idle ([kernel.kallsyms]) cpu_startup_entry ([kernel.kallsyms]) secondary_startup_64_no_verify ([kernel.kallsyms]) # Please see tools/include/uapi/README for further details. Cc: Adrian Hunter Cc: Babu Moger Cc: Dapeng Mi Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: K Prateek Nayak Cc: Namhyung Kim Cc: Neeraj Upadhyay Cc: Perry Yuan Cc: Peter Zijlstra Cc: Sean Christopherson Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index f627196eb796..9e1720d73244 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -315,9 +315,12 @@ #define PERF_CAP_PT_IDX 16 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 + +#define PERF_CAP_LBR_FMT 0x3f #define PERF_CAP_PEBS_TRAP BIT_ULL(6) #define PERF_CAP_ARCH_REG BIT_ULL(7) #define PERF_CAP_PEBS_FORMAT 0xf00 +#define PERF_CAP_FW_WRITES BIT_ULL(13) #define PERF_CAP_PEBS_BASELINE BIT_ULL(14) #define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17) #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ @@ -633,6 +636,11 @@ #define MSR_AMD_PPIN 0xc00102f1 #define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 + +#define MSR_AMD64_CPUID_EXT_FEAT 0xc0011005 +#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT 54 +#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT) + #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_TW_CFG 0xc0011023 @@ -701,8 +709,15 @@ #define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) #define MSR_AMD64_SNP_SMT_PROT_BIT 17 #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) -#define MSR_AMD64_SNP_RESV_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 +#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) +#define MSR_AMD64_SNP_RESV_BIT 19 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) +#define MSR_AMD64_SAVIC_CONTROL 0xc0010138 +#define MSR_AMD64_SAVIC_EN_BIT 0 +#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT) +#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1 +#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT) #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 #define MSR_AMD64_RMP_CFG 0xc0010136 @@ -735,6 +750,7 @@ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 +#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303 /* AMD Hardware Feedback Support MSRs */ #define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 @@ -1225,6 +1241,8 @@ /* - AMD: */ #define MSR_IA32_MBA_BW_BASE 0xc0000200 #define MSR_IA32_SMBA_BW_BASE 0xc0000280 +#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd +#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff #define MSR_IA32_EVT_CFG_BASE 0xc0000400 /* AMD-V MSRs */ From 76977baa83a747f7b405d61f66775e3f0316f4b5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Oct 2025 17:04:47 -0300 Subject: [PATCH 0624/1075] tools headers uapi: Update fs.h with the kernel sources To pick up changes from: db2ab24a341ce893 ("Add RWF_NOSIGNAL flag for pwritev2") These are used to beautify fs syscall arguments, albeit the changes in this update are not affecting those beautifiers. This addresses these tools/ build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h Please see tools/include/uapi/README for details (it's in the first patch of this series). Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Lauri Vasama Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/uapi/linux/fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 0bd678a4a10e..beb4c2d1e41c 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -430,10 +430,13 @@ typedef int __bitwise __kernel_rwf_t; /* buffered IO that drops the cache after reading or writing data */ #define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) +/* prevent pipe and socket writes from raising SIGPIPE */ +#define RWF_NOSIGNAL ((__force __kernel_rwf_t)0x00000100) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ - RWF_DONTCACHE) + RWF_DONTCACHE | RWF_NOSIGNAL) #define PROCFS_IOCTL_MAGIC 'f' From 5be93389f3e5ce552b21f84fb28926f90a0dcd53 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 1 Nov 2025 12:53:56 -0300 Subject: [PATCH 0625/1075] tools headers: Sync uapi/linux/prctl.h with the kernel source To pick up the changes in these csets: 8cdc4d27019356b0 ("mm/huge_memory: respect MADV_COLLAPSE with PR_THP_DISABLE_EXCEPT_ADVISED") 9dc21bbd62edeae6 ("prctl: extend PR_SET_THP_DISABLE to optionally exclude VM_HUGEPAGE") That don't introduce anything of interest for the tools/, just addressing these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Please see tools/include/uapi/README for further details. Cc: Andrew Morton Cc: David Hildenbrand Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/uapi/linux/prctl.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index ed3aed264aeb..51c4e8c82b1e 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -177,7 +177,17 @@ struct prctl_mm_map { #define PR_GET_TID_ADDRESS 40 +/* + * Flags for PR_SET_THP_DISABLE are only applicable when disabling. Bit 0 + * is reserved, so PR_GET_THP_DISABLE can return "1 | flags", to effectively + * return "1" when no flags were specified for PR_SET_THP_DISABLE. + */ #define PR_SET_THP_DISABLE 41 +/* + * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE / + * VM_HUGEPAGE, MADV_COLLAPSE). + */ +# define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) #define PR_GET_THP_DISABLE 42 /* From 5466858a70827d6ceaf4a9d292a317bf420593d7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 1 Nov 2025 13:01:13 -0300 Subject: [PATCH 0626/1075] tools headers: Sync uapi/linux/fcntl.h with the kernel sources To pick up the changes in this cset: e83f0b5d10dcf628 ("nsfs: support exhaustive file handles") That doesn't introduce anything of interest for tools/, just addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h Please see tools/include/uapi/README for further details. Cc: Christian Brauner Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/uapi/linux/fcntl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index f291ab4f94eb..3741ea1b73d8 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -111,6 +111,7 @@ #define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ #define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ +#define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */ #define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ /* Generic flags for the *at(2) family of syscalls. */ From e0acec3369ca7071d14196c33e7bef35f61f9cde Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 1 Nov 2025 13:04:20 -0300 Subject: [PATCH 0627/1075] tools headers x86: Sync table due to introducion of uprobe syscall To pick the changes in this cset: 56101b69c9190667 ("uprobes/x86: Add uprobe syscall to speed up uprobe") That add support for this new 'uprobe' syscall in tools such as 'perf trace'. Now it is possible to do a system wide 'perf trace' to look if this new syscall is being used: root@number:~# perf trace -v -e uprobe event qualifier tracepoint filter: (common_pid != 33989) && (id == 336) ^C root@number# $ grep -w uprobe tools/perf/arch/x86/entry/syscalls/syscall_64.tbl 336 common uprobe sys_uprobe $ This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Please see tools/include/uapi/README for further details. Cc: Jiri Olsa Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 92cf0fe2291e..ced2a1deecd7 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -345,6 +345,7 @@ 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq 335 common uretprobe sys_uretprobe +336 common uprobe sys_uprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal From ccaba800e78f2ac1e3a407b623653149bf9c0763 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 1 Nov 2025 13:16:47 -0300 Subject: [PATCH 0628/1075] tools headers x86 cpufeatures: Sync with the kernel sources To pick the changes from: e19c06219985f2be ("x86/cpufeatures: Add support for Assignable Bandwidth Monitoring Counters (ABMC)") 7b59c73fd611eae8 ("x86/cpufeatures: Add SNP Secure TSC") 3c7cb84145336721 ("x86/cpufeatures: Add a CPU feature bit for MSR immediate form instructions") 2f8f173413f1cbf5 ("x86/vmscape: Add conditional IBPB mitigation") a508cec6e5215a3f ("x86/vmscape: Enumerate VMSCAPE bug") This causes these perf files to be rebuilt and brings some X86_FEATURE that may be used by: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Please see tools/include/uapi/README for further details. Cc: Babu Moger Cc: Borislav Petkov (AMD) Cc: Dave Hansen Cc: Nikunj A Dadhania Cc: Pawan Gupta Cc: Sean Christopherson Cc: Xin Li Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 06fc0479a23f..4091a776e37a 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -444,6 +444,7 @@ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ #define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ #define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ @@ -495,6 +496,9 @@ #define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ #define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ +#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ +#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ +#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */ /* * BUG word(s) @@ -551,4 +555,5 @@ #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ #define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ +#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */ #endif /* _ASM_X86_CPUFEATURES_H */ From 320258783765316d2baae99c26e461ee634054fe Mon Sep 17 00:00:00 2001 From: Jay Bhat Date: Thu, 30 Oct 2025 21:17:25 -0500 Subject: [PATCH 0629/1075] RDMA/irdma: Fix vf_id size to u16 to avoid overflow Correctly size the vf_id to u16 to avoid overflow. Signed-off-by: Jay Bhat Signed-off-by: Tatyana Nikolova Link: https://patch.msgid.link/20251031021726.1003-6-tatyana.e.nikolova@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 4ae77cdde9dc..c1b8f81ea283 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -706,7 +706,7 @@ struct irdma_sc_dev { u32 vchnl_ver; u16 num_vfs; u16 hmc_fn_id; - u8 vf_id; + u16 vf_id; bool privileged:1; bool vchnl_up:1; bool ceq_valid:1; From b8126205dbe01e22b0d10c8be132bb53bf3399c1 Mon Sep 17 00:00:00 2001 From: Krzysztof Czurylo Date: Thu, 30 Oct 2025 21:17:20 -0500 Subject: [PATCH 0630/1075] MAINTAINERS: Update irdma maintainers Adds Krzysztof Czurylo as co-maintainer for irdma driver. Signed-off-by: Krzysztof Czurylo Signed-off-by: Tatyana Nikolova Signed-off-by: Leon Romanovsky --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..8861469749e4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12515,6 +12515,7 @@ F: include/linux/avf/virtchnl.h F: include/linux/net/intel/*/ INTEL ETHERNET PROTOCOL DRIVER FOR RDMA +M: Krzysztof Czurylo M: Tatyana Nikolova L: linux-rdma@vger.kernel.org S: Supported From 6146a0f1dfae5d37442a9ddcba012add260bceb0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Nov 2025 11:28:02 -0800 Subject: [PATCH 0631/1075] Linux 6.18-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b34a1f4c0396..088565edc911 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* From ff4d2ef3874773c9c6173b0f099372bf62252aaf Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 29 Oct 2025 08:14:06 +0100 Subject: [PATCH 0632/1075] rust: devres: fix private intra-doc link The future move of pin-init to `syn` uncovers the following private intra-doc link: error: public documentation for `Devres` links to private item `Self::inner` --> rust/kernel/devres.rs:106:7 | 106 | /// [`Self::inner`] is guaranteed to be initialized and is always accessed read-only. | ^^^^^^^^^^^ this item is private | = note: this link will resolve properly if you pass `--document-private-items` = note: `-D rustdoc::private-intra-doc-links` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(rustdoc::private_intra_doc_links)]` Currently, when rendered, the link points to "nowhere" (an inexistent anchor for a "method"). Thus fix it. Cc: stable@vger.kernel.org Fixes: f5d3ef25d238 ("rust: devres: get rid of Devres' inner Arc") Acked-by: Danilo Krummrich Link: https://patch.msgid.link/20251029071406.324511-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/devres.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/devres.rs b/rust/kernel/devres.rs index 10a6a1789854..2392c281459e 100644 --- a/rust/kernel/devres.rs +++ b/rust/kernel/devres.rs @@ -103,7 +103,7 @@ struct Inner { /// /// # Invariants /// -/// [`Self::inner`] is guaranteed to be initialized and is always accessed read-only. +/// `Self::inner` is guaranteed to be initialized and is always accessed read-only. #[pin_data(PinnedDrop)] pub struct Devres { dev: ARef, From 09b1704f5b02c18dd02b21343530463fcfc92c54 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 29 Oct 2025 08:33:44 +0100 Subject: [PATCH 0633/1075] rust: condvar: fix broken intra-doc link The future move of pin-init to `syn` uncovers the following broken intra-doc link: error: unresolved link to `crate::pin_init` --> rust/kernel/sync/condvar.rs:39:40 | 39 | /// instances is with the [`pin_init`](crate::pin_init!) and [`new_condvar`] macros. | ^^^^^^^^^^^^^^^^ no item named `pin_init` in module `kernel` | = note: `-D rustdoc::broken-intra-doc-links` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(rustdoc::broken_intra_doc_links)]` Currently, when rendered, the link points to a literal `crate::pin_init!` URL. Thus fix it. Cc: stable@vger.kernel.org Fixes: 129e97be8e28 ("rust: pin-init: fix documentation links") Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20251029073344.349341-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/kernel/sync/condvar.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs index c6ec64295c9f..aa5b9a7a726d 100644 --- a/rust/kernel/sync/condvar.rs +++ b/rust/kernel/sync/condvar.rs @@ -36,7 +36,7 @@ macro_rules! new_condvar { /// spuriously. /// /// Instances of [`CondVar`] need a lock class and to be pinned. The recommended way to create such -/// instances is with the [`pin_init`](crate::pin_init!) and [`new_condvar`] macros. +/// instances is with the [`pin_init`](pin_init::pin_init!) and [`new_condvar`] macros. /// /// # Examples /// From 083d7af3350e04c428256a3bd10003f63151b6b1 Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Mon, 27 Oct 2025 10:52:06 +0800 Subject: [PATCH 0634/1075] MAINTAINERS: erofs: add myself as reviewer In the past two years, I have focused on EROFS and contributed features including the reserved buffer pool, configurable global buffer pool, and the ongoing direct I/O support for compressed data. I would like to continue contributing to EROFS and help with code reviews. Please CC me on EROFS-related changes. Signed-off-by: Chunhai Guo Acked-by: Gao Xiang Acked-by: Chao Yu Acked-by: Hongbo Li Signed-off-by: Gao Xiang --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46bd8e033042..f2665f23ad5e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9207,6 +9207,7 @@ R: Yue Hu R: Jeffle Xu R: Sandeep Dhavale R: Hongbo Li +R: Chunhai Guo L: linux-erofs@lists.ozlabs.org S: Maintained W: https://erofs.docs.kernel.org From eb3182ef0405ff2f6668fd3e5ff9883f60ce8801 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Wed, 15 Oct 2025 13:18:28 +0800 Subject: [PATCH 0635/1075] perf/core: Fix system hang caused by cpu-clock usage cpu-clock usage by the async-profiler tool can trigger a system hang, which got bisected back to the following commit by Octavia Togami: 18dbcbfabfff ("perf: Fix the POLL_HUP delivery breakage") causes this issue The root cause of the hang is that cpu-clock is a special type of SW event which relies on hrtimers. The __perf_event_overflow() callback is invoked from the hrtimer handler for cpu-clock events, and __perf_event_overflow() tries to call cpu_clock_event_stop() to stop the event, which calls htimer_cancel() to cancel the hrtimer. But that's a recursion into the hrtimer code from a hrtimer handler, which (unsurprisingly) deadlocks. To fix this bug, use hrtimer_try_to_cancel() instead, and set the PERF_HES_STOPPED flag, which causes perf_swevent_hrtimer() to stop the event once it sees the PERF_HES_STOPPED flag. [ mingo: Fixed the comments and improved the changelog. ] Closes: https://lore.kernel.org/all/CAHPNGSQpXEopYreir+uDDEbtXTBvBvi8c6fYXJvceqtgTPao3Q@mail.gmail.com/ Fixes: 18dbcbfabfff ("perf: Fix the POLL_HUP delivery breakage") Reported-by: Octavia Togami Suggested-by: Peter Zijlstra Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Tested-by: Octavia Togami Cc: stable@vger.kernel.org Link: https://github.com/lucko/spark/issues/530 Link: https://patch.msgid.link/20251015051828.12809-1-dapeng1.mi@linux.intel.com --- kernel/events/core.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 177e57c1a362..1fd347da9026 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11773,7 +11773,8 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) event = container_of(hrtimer, struct perf_event, hw.hrtimer); - if (event->state != PERF_EVENT_STATE_ACTIVE) + if (event->state != PERF_EVENT_STATE_ACTIVE || + event->hw.state & PERF_HES_STOPPED) return HRTIMER_NORESTART; event->pmu->read(event); @@ -11819,15 +11820,20 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; /* - * The throttle can be triggered in the hrtimer handler. - * The HRTIMER_NORESTART should be used to stop the timer, - * rather than hrtimer_cancel(). See perf_swevent_hrtimer() + * Careful: this function can be triggered in the hrtimer handler, + * for cpu-clock events, so hrtimer_cancel() would cause a + * deadlock. + * + * So use hrtimer_try_to_cancel() to try to stop the hrtimer, + * and the cpu-clock handler also sets the PERF_HES_STOPPED flag, + * which guarantees that perf_swevent_hrtimer() will stop the + * hrtimer once it sees the PERF_HES_STOPPED flag. */ if (is_sampling_event(event) && (hwc->interrupts != MAX_INTERRUPTS)) { ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); local64_set(&hwc->period_left, ktime_to_ns(remaining)); - hrtimer_cancel(&hwc->hrtimer); + hrtimer_try_to_cancel(&hwc->hrtimer); } } @@ -11871,12 +11877,14 @@ static void cpu_clock_event_update(struct perf_event *event) static void cpu_clock_event_start(struct perf_event *event, int flags) { + event->hw.state = 0; local64_set(&event->hw.prev_count, local_clock()); perf_swevent_start_hrtimer(event); } static void cpu_clock_event_stop(struct perf_event *event, int flags) { + event->hw.state = PERF_HES_STOPPED; perf_swevent_cancel_hrtimer(event); if (flags & PERF_EF_UPDATE) cpu_clock_event_update(event); @@ -11950,12 +11958,14 @@ static void task_clock_event_update(struct perf_event *event, u64 now) static void task_clock_event_start(struct perf_event *event, int flags) { + event->hw.state = 0; local64_set(&event->hw.prev_count, event->ctx->time); perf_swevent_start_hrtimer(event); } static void task_clock_event_stop(struct perf_event *event, int flags) { + event->hw.state = PERF_HES_STOPPED; perf_swevent_cancel_hrtimer(event); if (flags & PERF_EF_UPDATE) task_clock_event_update(event, event->ctx->time); From a1d3bc606bf5c3b3ea811cc2019df6285d75b00f Mon Sep 17 00:00:00 2001 From: Mikhail Kshevetskiy Date: Mon, 3 Nov 2025 04:01:48 +0300 Subject: [PATCH 0636/1075] mtd: spinand: fmsh: remove QE bit for FM25S01A flash According to datasheet (http://eng.fmsh.com/nvm/FM25S01A_ds_eng.pdf) there is no QE (Quad Enable) bit for FM25S01A flash, so remove it. Fixes: 5f284dc15ca86 ("mtd: spinand: add support for FudanMicro FM25S01A") Signed-off-by: Mikhail Kshevetskiy Tested-by: Tianling Shen Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/fmsh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/spi/fmsh.c b/drivers/mtd/nand/spi/fmsh.c index 8b2097bfc771..c2b9a8c113cb 100644 --- a/drivers/mtd/nand/spi/fmsh.c +++ b/drivers/mtd/nand/spi/fmsh.c @@ -58,7 +58,7 @@ static const struct spinand_info fmsh_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_variants, &write_cache_variants, &update_cache_variants), - SPINAND_HAS_QE_BIT, + 0, SPINAND_ECCINFO(&fm25s01a_ooblayout, NULL)), }; From 97315e7c901a1de60e8ca9b11e0e96d0f9253e18 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 1 Nov 2025 16:25:48 +0300 Subject: [PATCH 0637/1075] mtd: onenand: Pass correct pointer to IRQ handler This was supposed to pass "onenand" instead of "&onenand" with the ampersand. Passing a random stack address which will be gone when the function ends makes no sense. However the good thing is that the pointer is never used, so this doesn't cause a problem at run time. Fixes: e23abf4b7743 ("mtd: OneNAND: S5PC110: Implement DMA interrupt method") Signed-off-by: Dan Carpenter Signed-off-by: Miquel Raynal --- drivers/mtd/nand/onenand/onenand_samsung.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/onenand/onenand_samsung.c b/drivers/mtd/nand/onenand/onenand_samsung.c index f37a6138e461..6d6aa709a21f 100644 --- a/drivers/mtd/nand/onenand/onenand_samsung.c +++ b/drivers/mtd/nand/onenand/onenand_samsung.c @@ -906,7 +906,7 @@ static int s3c_onenand_probe(struct platform_device *pdev) err = devm_request_irq(&pdev->dev, r->start, s5pc110_onenand_irq, IRQF_SHARED, "onenand", - &onenand); + onenand); if (err) { dev_err(&pdev->dev, "failed to get irq\n"); return err; From 4a372798e12cc777918db90c06e11c80e90bd001 Mon Sep 17 00:00:00 2001 From: Jihed Chaibi Date: Thu, 21 Aug 2025 21:47:10 +0200 Subject: [PATCH 0638/1075] dt-bindings: gpio: ti,twl4030: Correct the schema $id path The $id for a binding should match its file path. The ti,twl4030-gpio binding is located in the gpio/ subdirectory but was missing this from its $id. Correct the path to follow the standard convention. Fixes: 842dcff8e2d6 ("dt-bindings: gpio: Convert ti,twl4030-gpio to DT schema") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Jihed Chaibi Fixes: 842dcff8e2d6 ("dt-bindings: gpio: Convert ti,twl4030-gpio to DT schema") Link: https://lore.kernel.org/r/20250821194710.16043-1-jihed.chaibi.dev@gmail.com Signed-off-by: Bartosz Golaszewski --- Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml b/Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml index 5e3e199fd9a4..96d50d14c071 100644 --- a/Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml +++ b/Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml @@ -1,7 +1,7 @@ # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) %YAML 1.2 --- -$id: http://devicetree.org/schemas/ti,twl4030-gpio.yaml# +$id: http://devicetree.org/schemas/gpio/ti,twl4030-gpio.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# title: TI TWL4030 GPIO controller From 0a4b61d9c2e496b5f0a10e29e355a1465c8738bb Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 28 Oct 2025 21:35:42 +0000 Subject: [PATCH 0639/1075] x86/amd_node: Fix AMD root device caching Recent AMD node rework removed the "search and count" method of caching AMD root devices. This depended on the value from a Data Fabric register that was expected to hold the PCI bus of one of the root devices attached to that fabric. However, this expectation is incorrect. The register, when read from PCI config space, returns the bitwise-OR of the buses of all attached root devices. This behavior is benign on AMD reference design boards, since the bus numbers are aligned. This results in a bitwise-OR value matching one of the buses. For example, 0x00 | 0x40 | 0xA0 | 0xE0 = 0xE0. This behavior breaks on boards where the bus numbers are not exactly aligned. For example, 0x00 | 0x07 | 0xE0 | 0x15 = 0x1F. The examples above are for AMD node 0. The first root device on other nodes will not be 0x00. The first root device for other nodes will depend on the total number of root devices, the system topology, and the specific PCI bus number assignment. For example, a system with 2 AMD nodes could have this: Node 0 : 0x00 0x07 0x0e 0x15 Node 1 : 0x1c 0x23 0x2a 0x31 The bus numbering style in the reference boards is not a requirement. The numbering found in other boards is not incorrect. Therefore, the root device caching method needs to be adjusted. Go back to the "search and count" method used before the recent rework. Search for root devices using PCI class code rather than fixed PCI IDs. This keeps the goal of the rework (remove dependency on PCI IDs) while being able to support various board designs. Merge helper functions to reduce code duplication. [ bp: Reflow comment. ] Fixes: 40a5f6ffdfc8 ("x86/amd_nb: Simplify root device search") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/all/20251028-fix-amd-root-v2-1-843e38f8be2c@amd.com --- arch/x86/include/asm/amd/node.h | 1 - arch/x86/kernel/amd_node.c | 150 +++++++++++--------------------- 2 files changed, 51 insertions(+), 100 deletions(-) diff --git a/arch/x86/include/asm/amd/node.h b/arch/x86/include/asm/amd/node.h index 23fe617898a8..a672b8765fa8 100644 --- a/arch/x86/include/asm/amd/node.h +++ b/arch/x86/include/asm/amd/node.h @@ -23,7 +23,6 @@ #define AMD_NODE0_PCI_SLOT 0x18 struct pci_dev *amd_node_get_func(u16 node, u8 func); -struct pci_dev *amd_node_get_root(u16 node); static inline u16 amd_num_nodes(void) { diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c index a40176b62eb5..3d0a4768d603 100644 --- a/arch/x86/kernel/amd_node.c +++ b/arch/x86/kernel/amd_node.c @@ -34,62 +34,6 @@ struct pci_dev *amd_node_get_func(u16 node, u8 func) return pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(AMD_NODE0_PCI_SLOT + node, func)); } -#define DF_BLK_INST_CNT 0x040 -#define DF_CFG_ADDR_CNTL_LEGACY 0x084 -#define DF_CFG_ADDR_CNTL_DF4 0xC04 - -#define DF_MAJOR_REVISION GENMASK(27, 24) - -static u16 get_cfg_addr_cntl_offset(struct pci_dev *df_f0) -{ - u32 reg; - - /* - * Revision fields added for DF4 and later. - * - * Major revision of '0' is found pre-DF4. Field is Read-as-Zero. - */ - if (pci_read_config_dword(df_f0, DF_BLK_INST_CNT, ®)) - return 0; - - if (reg & DF_MAJOR_REVISION) - return DF_CFG_ADDR_CNTL_DF4; - - return DF_CFG_ADDR_CNTL_LEGACY; -} - -struct pci_dev *amd_node_get_root(u16 node) -{ - struct pci_dev *root; - u16 cntl_off; - u8 bus; - - if (!cpu_feature_enabled(X86_FEATURE_ZEN)) - return NULL; - - /* - * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) - * Bits [7:0] (SecBusNum) holds the bus number of the root device for - * this Data Fabric instance. The segment, device, and function will be 0. - */ - struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0); - if (!df_f0) - return NULL; - - cntl_off = get_cfg_addr_cntl_offset(df_f0); - if (!cntl_off) - return NULL; - - if (pci_read_config_byte(df_f0, cntl_off, &bus)) - return NULL; - - /* Grab the pointer for the actual root device instance. */ - root = pci_get_domain_bus_and_slot(0, bus, 0); - - pci_dbg(root, "is root for AMD node %u\n", node); - return root; -} - static struct pci_dev **amd_roots; /* Protect the PCI config register pairs used for SMN. */ @@ -274,51 +218,21 @@ DEFINE_SHOW_STORE_ATTRIBUTE(smn_node); DEFINE_SHOW_STORE_ATTRIBUTE(smn_address); DEFINE_SHOW_STORE_ATTRIBUTE(smn_value); -static int amd_cache_roots(void) +static struct pci_dev *get_next_root(struct pci_dev *root) { - u16 node, num_nodes = amd_num_nodes(); - - amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); - if (!amd_roots) - return -ENOMEM; - - for (node = 0; node < num_nodes; node++) - amd_roots[node] = amd_node_get_root(node); - - return 0; -} - -static int reserve_root_config_spaces(void) -{ - struct pci_dev *root = NULL; - struct pci_bus *bus = NULL; - - while ((bus = pci_find_next_bus(bus))) { - /* Root device is Device 0 Function 0 on each Primary Bus. */ - root = pci_get_slot(bus, 0); - if (!root) + while ((root = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, root))) { + /* Root device is Device 0 Function 0. */ + if (root->devfn) continue; if (root->vendor != PCI_VENDOR_ID_AMD && root->vendor != PCI_VENDOR_ID_HYGON) continue; - pci_dbg(root, "Reserving PCI config space\n"); - - /* - * There are a few SMN index/data pairs and other registers - * that shouldn't be accessed by user space. - * So reserve the entire PCI config space for simplicity rather - * than covering specific registers piecemeal. - */ - if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { - pci_err(root, "Failed to reserve config space\n"); - return -EEXIST; - } + break; } - smn_exclusive = true; - return 0; + return root; } static bool enable_dfs; @@ -332,7 +246,8 @@ __setup("amd_smn_debugfs_enable", amd_smn_enable_dfs); static int __init amd_smn_init(void) { - int err; + u16 count, num_roots, roots_per_node, node, num_nodes; + struct pci_dev *root; if (!cpu_feature_enabled(X86_FEATURE_ZEN)) return 0; @@ -342,13 +257,48 @@ static int __init amd_smn_init(void) if (amd_roots) return 0; - err = amd_cache_roots(); - if (err) - return err; + num_roots = 0; + root = NULL; + while ((root = get_next_root(root))) { + pci_dbg(root, "Reserving PCI config space\n"); - err = reserve_root_config_spaces(); - if (err) - return err; + /* + * There are a few SMN index/data pairs and other registers + * that shouldn't be accessed by user space. So reserve the + * entire PCI config space for simplicity rather than covering + * specific registers piecemeal. + */ + if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { + pci_err(root, "Failed to reserve config space\n"); + return -EEXIST; + } + + num_roots++; + } + + pr_debug("Found %d AMD root devices\n", num_roots); + + if (!num_roots) + return -ENODEV; + + num_nodes = amd_num_nodes(); + amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); + if (!amd_roots) + return -ENOMEM; + + roots_per_node = num_roots / num_nodes; + + count = 0; + node = 0; + root = NULL; + while (node < num_nodes && (root = get_next_root(root))) { + /* Use one root for each node and skip the rest. */ + if (count++ % roots_per_node) + continue; + + pci_dbg(root, "is root for AMD node %u\n", node); + amd_roots[node++] = root; + } if (enable_dfs) { debugfs_dir = debugfs_create_dir("amd_smn", arch_debugfs_dir); @@ -358,6 +308,8 @@ static int __init amd_smn_init(void) debugfs_create_file("value", 0600, debugfs_dir, NULL, &smn_value_fops); } + smn_exclusive = true; + return 0; } From 8da0efc3da9312b65f5cbf06e57d284f69222b2e Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Mon, 3 Nov 2025 11:58:09 +0000 Subject: [PATCH 0640/1075] ASoC: doc: cs35l56: Update firmware filename description for B0 silicon Update the text for firmware file naming to show that the l?u? suffix is supported on CS35L56 B0 silicon and ampN was only used on early firmware. The previous version of this text only said that B0 silicon used the ampN suffix. Since kernel 6.16 the driver supports both the old ampN and new l?u? suffix for B0 silicon. New firmwares will use the l?u? suffix. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20251103115809.33953-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- Documentation/sound/codecs/cs35l56.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Documentation/sound/codecs/cs35l56.rst b/Documentation/sound/codecs/cs35l56.rst index 57d1964453e1..d5363b08f515 100644 --- a/Documentation/sound/codecs/cs35l56.rst +++ b/Documentation/sound/codecs/cs35l56.rst @@ -105,10 +105,10 @@ In this example the SSID is 10280c63. The format of the firmware file names is: -SoundWire (except CS35L56 Rev B0): +SoundWire: cs35lxx-b0-dsp1-misc-SSID[-spkidX]-l?u? -SoundWire CS35L56 Rev B0: +SoundWire CS35L56 Rev B0 firmware released before kernel version 6.16: cs35lxx-b0-dsp1-misc-SSID[-spkidX]-ampN Non-SoundWire (HDA and I2S): @@ -127,9 +127,8 @@ Where: * spkidX is an optional part, used for laptops that have firmware configurations for different makes and models of internal speakers. -The CS35L56 Rev B0 continues to use the old filename scheme because a -large number of firmware files have already been published with these -names. +Early firmware for CS35L56 Rev B0 used the ALSA prefix (ampN) as the +filename qualifier. Support for the l?u? qualifier was added in kernel 6.16. Sound Open Firmware and ALSA topology files ------------------------------------------- From b4cd8f94ae7e5b0d83041b186258eba0c8f7c5f7 Mon Sep 17 00:00:00 2001 From: Matt Coster Date: Tue, 14 Oct 2025 12:57:31 +0100 Subject: [PATCH 0641/1075] drm/imagination: Optionally depend on POWER_SEQUENCING When the change using pwrseq was added, I nixed the dependency on POWER_SEQUENCING since we didn't want it pulled in on platforms where it's not needed [1]. I hadn't, however, considered the link-time implications of this for configs with POWER_SEQUENCING=m. [1]: https://lore.kernel.org/r/a265a20e-8908-40d8-b4e0-2c8b8f773742@imgtec.com/ Fixes: e38e8391f30b ("drm/imagination: Use pwrseq for TH1520 GPU power management") Cc: stable@vger.kernel.org Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202510111806.CMulNMKW-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202511011739.SONHjSfR-lkp@intel.com/ Reviewed-by: Alessio Belle Link: https://patch.msgid.link/20251014-pwrseq-dep-v1-1-49aabd9d8fa1@imgtec.com Signed-off-by: Matt Coster --- drivers/gpu/drm/imagination/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/imagination/Kconfig b/drivers/gpu/drm/imagination/Kconfig index 682dd2633d0c..0482bfcefdde 100644 --- a/drivers/gpu/drm/imagination/Kconfig +++ b/drivers/gpu/drm/imagination/Kconfig @@ -7,6 +7,7 @@ config DRM_POWERVR depends on DRM depends on MMU depends on PM + depends on POWER_SEQUENCING || !POWER_SEQUENCING select DRM_EXEC select DRM_GEM_SHMEM_HELPER select DRM_SCHED From e5d527be7e6984882306b49c067f1fec18920735 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 3 Nov 2025 10:35:24 +0100 Subject: [PATCH 0642/1075] gpio: swnode: don't use the swnode's name as the key for GPIO lookup Looking up a GPIO controller by label that is the name of the software node is wonky at best - the GPIO controller driver is free to set a different label than the name of its firmware node. We're already being passed a firmware node handle attached to the GPIO device to swnode_get_gpio_device() so use it instead for a more precise lookup. Acked-by: Linus Walleij Fixes: e7f9ff5dc90c ("gpiolib: add support for software nodes") Link: https://lore.kernel.org/r/20251103-reset-gpios-swnodes-v4-4-6461800b6775@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-swnode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-swnode.c b/drivers/gpio/gpiolib-swnode.c index f21dbc28cf2c..e3806db1c0e0 100644 --- a/drivers/gpio/gpiolib-swnode.c +++ b/drivers/gpio/gpiolib-swnode.c @@ -41,7 +41,7 @@ static struct gpio_device *swnode_get_gpio_device(struct fwnode_handle *fwnode) !strcmp(gdev_node->name, GPIOLIB_SWNODE_UNDEFINED_NAME)) return ERR_PTR(-ENOENT); - gdev = gpio_device_find_by_label(gdev_node->name); + gdev = gpio_device_find_by_fwnode(fwnode); return gdev ?: ERR_PTR(-EPROBE_DEFER); } From 2f6115ad8864cf3f48598f26c74c7c8e5c391919 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 3 Nov 2025 15:11:32 +0100 Subject: [PATCH 0643/1075] gpiolib: fix invalid pointer access in debugfs If the memory allocation in gpiolib_seq_start() fails, the s->private field remains uninitialized and is later dereferenced without checking in gpiolib_seq_stop(). Initialize s->private to NULL before calling kzalloc() and check it before dereferencing it. Fixes: e348544f7994 ("gpio: protect the list of GPIO devices with SRCU") Reviewed-by: Linus Walleij Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20251103141132.53471-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 9952e412da50..cd8800ba5825 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -5296,6 +5296,8 @@ static void *gpiolib_seq_start(struct seq_file *s, loff_t *pos) struct gpio_device *gdev; loff_t index = *pos; + s->private = NULL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return NULL; @@ -5329,7 +5331,11 @@ static void *gpiolib_seq_next(struct seq_file *s, void *v, loff_t *pos) static void gpiolib_seq_stop(struct seq_file *s, void *v) { - struct gpiolib_seq_priv *priv = s->private; + struct gpiolib_seq_priv *priv; + + priv = s->private; + if (!priv) + return; srcu_read_unlock(&gpio_devices_srcu, priv->idx); kfree(priv); From f8950b47db707565543d6c4a9fdb7e36b3329722 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 10:33:26 -0300 Subject: [PATCH 0644/1075] tools headers UAPI: Update tools's copy of drm.h to pick DRM_IOCTL_GEM_CHANGE_HANDLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Picking the changes from: 0864197382fa7c8c ("drm: Move drm_gem ioctl kerneldoc to uapi file") 53096728b8910c69 ("drm: Add DRM prime interface to reassign GEM handle") Addressing these perf build warnings: Warning: Kernel ABI header differences: Now 'perf trace' and other code that might use the tools/perf/trace/beauty autogenerated tables will be able to translate this new ioctl command into a string: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after --- before 2025-11-03 09:57:34.832553174 -0300 +++ after 2025-11-03 09:57:47.969409428 -0300 @@ -111,6 +111,7 @@ [0xCF] = "SYNCOBJ_EVENTFD", [0xD0] = "MODE_CLOSEFB", [0xD1] = "SET_CLIENT_NAME", + [0xD2] = "GEM_CHANGE_HANDLE", [DRM_COMMAND_BASE + 0x00] = "I915_INIT", [DRM_COMMAND_BASE + 0x01] = "I915_FLUSH", [DRM_COMMAND_BASE + 0x02] = "I915_FLIP", $ Please see tools/include/uapi/README for further details. Cc: Christian König Cc: David Francis Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 63 +++++++++++++++++++++++++++++------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index e63a71d3c607..3cd5cf15e3c9 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -597,34 +597,65 @@ struct drm_set_version { int drm_dd_minor; }; -/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/** + * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl. + * @handle: Handle of the object to be closed. + * @pad: Padding. + * + * Releases the handle to an mm object. + */ struct drm_gem_close { - /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/* DRM_IOCTL_GEM_FLINK ioctl argument type */ +/** + * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl. + * @handle: Handle for the object being named. + * @name: Returned global name. + * + * Create a global name for an object, returning the name. + * + * Note that the name does not hold a reference; when the object + * is freed, the name goes away. + */ struct drm_gem_flink { - /** Handle for the object being named */ __u32 handle; - - /** Returned global name */ __u32 name; }; -/* DRM_IOCTL_GEM_OPEN ioctl argument type */ +/** + * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl. + * @name: Name of object being opened. + * @handle: Returned handle for the object. + * @size: Returned size of the object + * + * Open an object using the global name, returning a handle and the size. + * + * This handle (of course) holds a reference to the object, so the object + * will not go away until the handle is deleted. + */ struct drm_gem_open { - /** Name of object being opened */ __u32 name; - - /** Returned handle for the object */ __u32 handle; - - /** Returned size of the object */ __u64 size; }; +/** + * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl. + * @handle: The handle of a gem object. + * @new_handle: An available gem handle. + * + * This ioctl changes the handle of a GEM object to the specified one. + * The new handle must be unused. On success the old handle is closed + * and all further IOCTL should refer to the new handle only. + * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle. + */ +struct drm_gem_change_handle { + __u32 handle; + __u32 new_handle; +}; + /** * DRM_CAP_DUMB_BUFFER * @@ -1309,6 +1340,14 @@ extern "C" { */ #define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) +/** + * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle + * + * Some applications (notably CRIU) need objects to have specific gem handles. + * This ioctl changes the object at one gem handle to use a new gem handle. + */ +#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. From 8f05967b022d255412640670915475ac4cdc10e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 3 Nov 2025 08:49:32 -0700 Subject: [PATCH 0645/1075] MAINTAINERS: correct git location for block layer tree As part of a recent move go exclusively listing git.kernel.org trees for the block and io_uring development, the "BLOCK LAYER" entry wasn't updated as it already used git.kernel.org. However, outside of just moving from git.kernel.dk to git.kernel.org, the "block" part of the trees was also dropped, as the tree serves both block and io_uring development trees. Fix up the "BLOCK LAYER" entry so they all use the same tree. Reported-by: John Garry Reviewed-by: John Garry Signed-off-by: Jens Axboe --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0554bf05b426..b986f4635b7d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4362,7 +4362,7 @@ BLOCK LAYER M: Jens Axboe L: linux-block@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git F: Documentation/ABI/stable/sysfs-block F: Documentation/block/ F: block/ From 819630bd6f86ac8998c7df9deddb6cee50e9e22d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 3 Nov 2025 13:53:13 +0000 Subject: [PATCH 0646/1075] io_uring/zcrx: remove sync refill uapi There is a better way to handle the problem IORING_REGISTER_ZCRX_REFILL solves. The uapi can also be slightly adjusted to accommodate future extensions. Remove the feature for now, it'll be reworked for the next release. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring.h | 12 ------- io_uring/register.c | 3 -- io_uring/zcrx.c | 68 ----------------------------------- io_uring/zcrx.h | 7 ---- 4 files changed, 90 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 263bed13473e..b7c8dad26690 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -689,9 +689,6 @@ enum io_uring_register_op { /* query various aspects of io_uring, see linux/io_uring/query.h */ IORING_REGISTER_QUERY = 35, - /* return zcrx buffers back into circulation */ - IORING_REGISTER_ZCRX_REFILL = 36, - /* this goes last */ IORING_REGISTER_LAST, @@ -1073,15 +1070,6 @@ struct io_uring_zcrx_ifq_reg { __u64 __resv[3]; }; -struct io_uring_zcrx_sync_refill { - __u32 zcrx_id; - /* the number of entries to return */ - __u32 nr_entries; - /* pointer to an array of struct io_uring_zcrx_rqe */ - __u64 rqes; - __u64 __resv[2]; -}; - #ifdef __cplusplus } #endif diff --git a/io_uring/register.c b/io_uring/register.c index 2e4717f1357c..d189b266b8cc 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -827,9 +827,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, case IORING_REGISTER_QUERY: ret = io_query(ctx, arg, nr_args); break; - case IORING_REGISTER_ZCRX_REFILL: - ret = io_zcrx_return_bufs(ctx, arg, nr_args); - break; default: ret = -EINVAL; break; diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index a816f5902091..b1b723222cdb 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -928,74 +928,6 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = { .uninstall = io_pp_uninstall, }; -#define IO_ZCRX_MAX_SYS_REFILL_BUFS (1 << 16) -#define IO_ZCRX_SYS_REFILL_BATCH 32 - -static void io_return_buffers(struct io_zcrx_ifq *ifq, - struct io_uring_zcrx_rqe *rqes, unsigned nr) -{ - int i; - - for (i = 0; i < nr; i++) { - struct net_iov *niov; - netmem_ref netmem; - - if (!io_parse_rqe(&rqes[i], ifq, &niov)) - continue; - - scoped_guard(spinlock_bh, &ifq->rq_lock) { - if (!io_zcrx_put_niov_uref(niov)) - continue; - } - - netmem = net_iov_to_netmem(niov); - if (!page_pool_unref_and_test(netmem)) - continue; - io_zcrx_return_niov(niov); - } -} - -int io_zcrx_return_bufs(struct io_ring_ctx *ctx, - void __user *arg, unsigned nr_arg) -{ - struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH]; - struct io_uring_zcrx_rqe __user *user_rqes; - struct io_uring_zcrx_sync_refill zr; - struct io_zcrx_ifq *ifq; - unsigned nr, i; - - if (nr_arg) - return -EINVAL; - if (copy_from_user(&zr, arg, sizeof(zr))) - return -EFAULT; - if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS) - return -EINVAL; - if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv))) - return -EINVAL; - - ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id); - if (!ifq) - return -EINVAL; - nr = zr.nr_entries; - user_rqes = u64_to_user_ptr(zr.rqes); - - for (i = 0; i < nr;) { - unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH); - size_t size = batch * sizeof(rqes[0]); - - if (copy_from_user(rqes, user_rqes + i, size)) - return i ? i : -EFAULT; - io_return_buffers(ifq, rqes, batch); - - i += batch; - - if (fatal_signal_pending(current)) - return i; - cond_resched(); - } - return nr; -} - static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov, struct io_zcrx_ifq *ifq, int off, int len) { diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 33ef61503092..a48871b5adad 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -63,8 +63,6 @@ struct io_zcrx_ifq { }; #if defined(CONFIG_IO_URING_ZCRX) -int io_zcrx_return_bufs(struct io_ring_ctx *ctx, - void __user *arg, unsigned nr_arg); int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg); void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); @@ -97,11 +95,6 @@ static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ct { return NULL; } -static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx, - void __user *arg, unsigned nr_arg) -{ - return -EOPNOTSUPP; -} #endif int io_recvzc(struct io_kiocb *req, unsigned int issue_flags); From bd9e119951b527724938ff55fb0ca3daa54f5c18 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 27 Oct 2025 20:26:28 +0100 Subject: [PATCH 0647/1075] Documentation: ACPI: i2c-muxes: fix I2C device references When the device references were changed from relative to absolute in commit e65cb011349e ("Documentation: ACPI: Fix parent device references"), the MUX0 device was omitted from the paths. So add it to fix the references. Fixes: e65cb011349e ("Documentation: ACPI: Fix parent device references") Closes: https://lore.kernel.org/all/48d0fb45-096c-4caa-b51c-753c2f17f018@gmail.com/ Signed-off-by: Jonas Gorski Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20251027192628.130998-1-jonas.gorski@gmail.com Signed-off-by: Rafael J. Wysocki --- Documentation/firmware-guide/acpi/i2c-muxes.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/firmware-guide/acpi/i2c-muxes.rst b/Documentation/firmware-guide/acpi/i2c-muxes.rst index f366539acd79..96ef4012d78f 100644 --- a/Documentation/firmware-guide/acpi/i2c-muxes.rst +++ b/Documentation/firmware-guide/acpi/i2c-muxes.rst @@ -37,8 +37,8 @@ which corresponds to the following ASL (in the scope of \_SB):: Name (_HID, ...) Name (_CRS, ResourceTemplate () { I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED, - AddressingMode7Bit, "\\_SB.SMB1.CH00", 0x00, - ResourceConsumer,,) + AddressingMode7Bit, "\\_SB.SMB1.MUX0.CH00", + 0x00, ResourceConsumer,,) } } } @@ -52,8 +52,8 @@ which corresponds to the following ASL (in the scope of \_SB):: Name (_HID, ...) Name (_CRS, ResourceTemplate () { I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED, - AddressingMode7Bit, "\\_SB.SMB1.CH01", 0x00, - ResourceConsumer,,) + AddressingMode7Bit, "\\_SB.SMB1.MUX0.CH01", + 0x00, ResourceConsumer,,) } } } From 29e4d12a2957e4e7dcad2418c7251f36285d99d8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 10:53:46 -0300 Subject: [PATCH 0648/1075] tools headers UAPI: Sync linux/kvm.h with the kernel sources To pick the changes in: fe2bf6234e947bf5 ("KVM: guest_memfd: Add INIT_SHARED flag, reject user page faults if not set") d2042d8f96ddefde ("KVM: Rework KVM_CAP_GUEST_MEMFD_MMAP into KVM_CAP_GUEST_MEMFD_FLAGS") 3d3a04fad25a6621 ("KVM: Allow and advertise support for host mmap() on guest_memfd files") That just rebuilds perf, as these patches don't add any new KVM ioctl to be harvested for the 'perf trace' ioctl syscall argument beautifiers. This addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Please see tools/include/uapi/README for further details. Cc: Sean Christopherson Cc: Fuad Tabba Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f0f0d49d2544..52f6000ab020 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -962,6 +962,7 @@ struct kvm_enable_cap { #define KVM_CAP_ARM_EL2_E2H0 241 #define KVM_CAP_RISCV_MP_STATE_RESET 242 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 +#define KVM_CAP_GUEST_MEMFD_FLAGS 244 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1598,6 +1599,8 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) +#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) +#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) struct kvm_create_guest_memfd { __u64 size; From 84bbe327a5cbb060f3321c9d9d4d53936fc1ef9b Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Thu, 23 Oct 2025 10:25:19 +0200 Subject: [PATCH 0649/1075] drm/i915: Avoid lock inversion when pinning to GGTT on CHV/BXT+VTD On completion of i915_vma_pin_ww(), a synchronous variant of dma_fence_work_commit() is called. When pinning a VMA to GGTT address space on a Cherry View family processor, or on a Broxton generation SoC with VTD enabled, i.e., when stop_machine() is then called from intel_ggtt_bind_vma(), that can potentially lead to lock inversion among reservation_ww and cpu_hotplug locks. [86.861179] ====================================================== [86.861193] WARNING: possible circular locking dependency detected [86.861209] 6.15.0-rc5-CI_DRM_16515-gca0305cadc2d+ #1 Tainted: G U [86.861226] ------------------------------------------------------ [86.861238] i915_module_loa/1432 is trying to acquire lock: [86.861252] ffffffff83489090 (cpu_hotplug_lock){++++}-{0:0}, at: stop_machine+0x1c/0x50 [86.861290] but task is already holding lock: [86.861303] ffffc90002e0b4c8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_vma_pin.constprop.0+0x39/0x1d0 [i915] [86.862233] which lock already depends on the new lock. [86.862251] the existing dependency chain (in reverse order) is: [86.862265] -> #5 (reservation_ww_class_mutex){+.+.}-{3:3}: [86.862292] dma_resv_lockdep+0x19a/0x390 [86.862315] do_one_initcall+0x60/0x3f0 [86.862334] kernel_init_freeable+0x3cd/0x680 [86.862353] kernel_init+0x1b/0x200 [86.862369] ret_from_fork+0x47/0x70 [86.862383] ret_from_fork_asm+0x1a/0x30 [86.862399] -> #4 (reservation_ww_class_acquire){+.+.}-{0:0}: [86.862425] dma_resv_lockdep+0x178/0x390 [86.862440] do_one_initcall+0x60/0x3f0 [86.862454] kernel_init_freeable+0x3cd/0x680 [86.862470] kernel_init+0x1b/0x200 [86.862482] ret_from_fork+0x47/0x70 [86.862495] ret_from_fork_asm+0x1a/0x30 [86.862509] -> #3 (&mm->mmap_lock){++++}-{3:3}: [86.862531] down_read_killable+0x46/0x1e0 [86.862546] lock_mm_and_find_vma+0xa2/0x280 [86.862561] do_user_addr_fault+0x266/0x8e0 [86.862578] exc_page_fault+0x8a/0x2f0 [86.862593] asm_exc_page_fault+0x27/0x30 [86.862607] filldir64+0xeb/0x180 [86.862620] kernfs_fop_readdir+0x118/0x480 [86.862635] iterate_dir+0xcf/0x2b0 [86.862648] __x64_sys_getdents64+0x84/0x140 [86.862661] x64_sys_call+0x1058/0x2660 [86.862675] do_syscall_64+0x91/0xe90 [86.862689] entry_SYSCALL_64_after_hwframe+0x76/0x7e [86.862703] -> #2 (&root->kernfs_rwsem){++++}-{3:3}: [86.862725] down_write+0x3e/0xf0 [86.862738] kernfs_add_one+0x30/0x3c0 [86.862751] kernfs_create_dir_ns+0x53/0xb0 [86.862765] internal_create_group+0x134/0x4c0 [86.862779] sysfs_create_group+0x13/0x20 [86.862792] topology_add_dev+0x1d/0x30 [86.862806] cpuhp_invoke_callback+0x4b5/0x850 [86.862822] cpuhp_issue_call+0xbf/0x1f0 [86.862836] __cpuhp_setup_state_cpuslocked+0x111/0x320 [86.862852] __cpuhp_setup_state+0xb0/0x220 [86.862866] topology_sysfs_init+0x30/0x50 [86.862879] do_one_initcall+0x60/0x3f0 [86.862893] kernel_init_freeable+0x3cd/0x680 [86.862908] kernel_init+0x1b/0x200 [86.862921] ret_from_fork+0x47/0x70 [86.862934] ret_from_fork_asm+0x1a/0x30 [86.862947] -> #1 (cpuhp_state_mutex){+.+.}-{3:3}: [86.862969] __mutex_lock+0xaa/0xed0 [86.862982] mutex_lock_nested+0x1b/0x30 [86.862995] __cpuhp_setup_state_cpuslocked+0x67/0x320 [86.863012] __cpuhp_setup_state+0xb0/0x220 [86.863026] page_alloc_init_cpuhp+0x2d/0x60 [86.863041] mm_core_init+0x22/0x2d0 [86.863054] start_kernel+0x576/0xbd0 [86.863068] x86_64_start_reservations+0x18/0x30 [86.863084] x86_64_start_kernel+0xbf/0x110 [86.863098] common_startup_64+0x13e/0x141 [86.863114] -> #0 (cpu_hotplug_lock){++++}-{0:0}: [86.863135] __lock_acquire+0x1635/0x2810 [86.863152] lock_acquire+0xc4/0x2f0 [86.863166] cpus_read_lock+0x41/0x100 [86.863180] stop_machine+0x1c/0x50 [86.863194] bxt_vtd_ggtt_insert_entries__BKL+0x3b/0x60 [i915] [86.863987] intel_ggtt_bind_vma+0x43/0x70 [i915] [86.864735] __vma_bind+0x55/0x70 [i915] [86.865510] fence_work+0x26/0xa0 [i915] [86.866248] fence_notify+0xa1/0x140 [i915] [86.866983] __i915_sw_fence_complete+0x8f/0x270 [i915] [86.867719] i915_sw_fence_commit+0x39/0x60 [i915] [86.868453] i915_vma_pin_ww+0x462/0x1360 [i915] [86.869228] i915_vma_pin.constprop.0+0x133/0x1d0 [i915] [86.870001] initial_plane_vma+0x307/0x840 [i915] [86.870774] intel_initial_plane_config+0x33f/0x670 [i915] [86.871546] intel_display_driver_probe_nogem+0x1c6/0x260 [i915] [86.872330] i915_driver_probe+0x7fa/0xe80 [i915] [86.873057] i915_pci_probe+0xe6/0x220 [i915] [86.873782] local_pci_probe+0x47/0xb0 [86.873802] pci_device_probe+0xf3/0x260 [86.873817] really_probe+0xf1/0x3c0 [86.873833] __driver_probe_device+0x8c/0x180 [86.873848] driver_probe_device+0x24/0xd0 [86.873862] __driver_attach+0x10f/0x220 [86.873876] bus_for_each_dev+0x7f/0xe0 [86.873892] driver_attach+0x1e/0x30 [86.873904] bus_add_driver+0x151/0x290 [86.873917] driver_register+0x5e/0x130 [86.873931] __pci_register_driver+0x7d/0x90 [86.873945] i915_pci_register_driver+0x23/0x30 [i915] [86.874678] i915_init+0x37/0x120 [i915] [86.875347] do_one_initcall+0x60/0x3f0 [86.875369] do_init_module+0x97/0x2a0 [86.875385] load_module+0x2c54/0x2d80 [86.875398] init_module_from_file+0x96/0xe0 [86.875413] idempotent_init_module+0x117/0x330 [86.875426] __x64_sys_finit_module+0x77/0x100 [86.875440] x64_sys_call+0x24de/0x2660 [86.875454] do_syscall_64+0x91/0xe90 [86.875470] entry_SYSCALL_64_after_hwframe+0x76/0x7e [86.875486] other info that might help us debug this: [86.875502] Chain exists of: cpu_hotplug_lock --> reservation_ww_class_acquire --> reservation_ww_class_mutex [86.875539] Possible unsafe locking scenario: [86.875552] CPU0 CPU1 [86.875563] ---- ---- [86.875573] lock(reservation_ww_class_mutex); [86.875588] lock(reservation_ww_class_acquire); [86.875606] lock(reservation_ww_class_mutex); [86.875624] rlock(cpu_hotplug_lock); [86.875637] *** DEADLOCK *** [86.875650] 3 locks held by i915_module_loa/1432: [86.875663] #0: ffff888101f5c1b0 (&dev->mutex){....}-{3:3}, at: __driver_attach+0x104/0x220 [86.875699] #1: ffffc90002e0b4a0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: i915_vma_pin.constprop.0+0x39/0x1d0 [i915] [86.876512] #2: ffffc90002e0b4c8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_vma_pin.constprop.0+0x39/0x1d0 [i915] [86.877305] stack backtrace: [86.877326] CPU: 0 UID: 0 PID: 1432 Comm: i915_module_loa Tainted: G U 6.15.0-rc5-CI_DRM_16515-gca0305cadc2d+ #1 PREEMPT(voluntary) [86.877334] Tainted: [U]=USER [86.877336] Hardware name: /NUC5CPYB, BIOS PYBSWCEL.86A.0079.2020.0420.1316 04/20/2020 [86.877339] Call Trace: [86.877344] [86.877353] dump_stack_lvl+0x91/0xf0 [86.877364] dump_stack+0x10/0x20 [86.877369] print_circular_bug+0x285/0x360 [86.877379] check_noncircular+0x135/0x150 [86.877390] __lock_acquire+0x1635/0x2810 [86.877403] lock_acquire+0xc4/0x2f0 [86.877408] ? stop_machine+0x1c/0x50 [86.877422] ? __pfx_bxt_vtd_ggtt_insert_entries__cb+0x10/0x10 [i915] [86.878173] cpus_read_lock+0x41/0x100 [86.878182] ? stop_machine+0x1c/0x50 [86.878191] ? __pfx_bxt_vtd_ggtt_insert_entries__cb+0x10/0x10 [i915] [86.878916] stop_machine+0x1c/0x50 [86.878927] bxt_vtd_ggtt_insert_entries__BKL+0x3b/0x60 [i915] [86.879652] intel_ggtt_bind_vma+0x43/0x70 [i915] [86.880375] __vma_bind+0x55/0x70 [i915] [86.881133] fence_work+0x26/0xa0 [i915] [86.881851] fence_notify+0xa1/0x140 [i915] [86.882566] __i915_sw_fence_complete+0x8f/0x270 [i915] [86.883286] i915_sw_fence_commit+0x39/0x60 [i915] [86.884003] i915_vma_pin_ww+0x462/0x1360 [i915] [86.884756] ? i915_vma_pin.constprop.0+0x6c/0x1d0 [i915] [86.885513] i915_vma_pin.constprop.0+0x133/0x1d0 [i915] [86.886281] initial_plane_vma+0x307/0x840 [i915] [86.887049] intel_initial_plane_config+0x33f/0x670 [i915] [86.887819] intel_display_driver_probe_nogem+0x1c6/0x260 [i915] [86.888587] i915_driver_probe+0x7fa/0xe80 [i915] [86.889293] ? mutex_unlock+0x12/0x20 [86.889301] ? drm_privacy_screen_get+0x171/0x190 [86.889308] ? acpi_dev_found+0x66/0x80 [86.889321] i915_pci_probe+0xe6/0x220 [i915] [86.890038] local_pci_probe+0x47/0xb0 [86.890049] pci_device_probe+0xf3/0x260 [86.890058] really_probe+0xf1/0x3c0 [86.890067] __driver_probe_device+0x8c/0x180 [86.890072] driver_probe_device+0x24/0xd0 [86.890078] __driver_attach+0x10f/0x220 [86.890083] ? __pfx___driver_attach+0x10/0x10 [86.890088] bus_for_each_dev+0x7f/0xe0 [86.890097] driver_attach+0x1e/0x30 [86.890101] bus_add_driver+0x151/0x290 [86.890107] driver_register+0x5e/0x130 [86.890113] __pci_register_driver+0x7d/0x90 [86.890119] i915_pci_register_driver+0x23/0x30 [i915] [86.890833] i915_init+0x37/0x120 [i915] [86.891482] ? __pfx_i915_init+0x10/0x10 [i915] [86.892135] do_one_initcall+0x60/0x3f0 [86.892145] ? __kmalloc_cache_noprof+0x33f/0x470 [86.892157] do_init_module+0x97/0x2a0 [86.892164] load_module+0x2c54/0x2d80 [86.892168] ? __kernel_read+0x15c/0x300 [86.892185] ? kernel_read_file+0x2b1/0x320 [86.892195] init_module_from_file+0x96/0xe0 [86.892199] ? init_module_from_file+0x96/0xe0 [86.892211] idempotent_init_module+0x117/0x330 [86.892224] __x64_sys_finit_module+0x77/0x100 [86.892230] x64_sys_call+0x24de/0x2660 [86.892236] do_syscall_64+0x91/0xe90 [86.892243] ? irqentry_exit+0x77/0xb0 [86.892249] ? sysvec_apic_timer_interrupt+0x57/0xc0 [86.892256] entry_SYSCALL_64_after_hwframe+0x76/0x7e [86.892261] RIP: 0033:0x7303e1b2725d [86.892271] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 8b bb 0d 00 f7 d8 64 89 01 48 [86.892276] RSP: 002b:00007ffddd1fdb38 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [86.892281] RAX: ffffffffffffffda RBX: 00005d771d88fd90 RCX: 00007303e1b2725d [86.892285] RDX: 0000000000000000 RSI: 00005d771d893aa0 RDI: 000000000000000c [86.892287] RBP: 00007ffddd1fdbf0 R08: 0000000000000040 R09: 00007ffddd1fdb80 [86.892289] R10: 00007303e1c03b20 R11: 0000000000000246 R12: 00005d771d893aa0 [86.892292] R13: 0000000000000000 R14: 00005d771d88f0d0 R15: 00005d771d895710 [86.892304] Call asynchronous variant of dma_fence_work_commit() in that case. v3: Provide more verbose in-line comment (Andi), - mention target environments in commit message. Fixes: 7d1c2618eac59 ("drm/i915: Take reservation lock around i915_vma_pin.") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14985 Cc: Andi Shyti Signed-off-by: Janusz Krzysztofik Reviewed-by: Sebastian Brzezinka Reviewed-by: Krzysztof Karas Acked-by: Andi Shyti Signed-off-by: Andi Shyti Link: https://lore.kernel.org/r/20251023082925.351307-6-janusz.krzysztofik@linux.intel.com (cherry picked from commit 648ef1324add1c2e2b6041cdf0b28d31fbca5f13) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_vma.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 25e97031d76e..30d5889fc809 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1595,8 +1595,20 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww, err_vma_res: i915_vma_resource_free(vma_res); err_fence: - if (work) - dma_fence_work_commit_imm(&work->base); + if (work) { + /* + * When pinning VMA to GGTT on CHV or BXT with VTD enabled, + * commit VMA binding asynchronously to avoid risk of lock + * inversion among reservation_ww locks held here and + * cpu_hotplug_lock acquired from stop_machine(), which we + * wrap around GGTT updates when running in those environments. + */ + if (i915_vma_is_ggtt(vma) && + intel_vm_no_concurrent_access_wa(vma->vm->i915)) + dma_fence_work_commit(&work->base); + else + dma_fence_work_commit_imm(&work->base); + } err_rpm: intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); From 7d44ad6b43d0be43d080180413a1b6c24cfbd266 Mon Sep 17 00:00:00 2001 From: Umesh Nerlige Ramappa Date: Wed, 15 Oct 2025 17:03:51 -0700 Subject: [PATCH 0650/1075] drm/i915: Fix conversion between clock ticks and nanoseconds When tick values are large, the multiplication by NSEC_PER_SEC is larger than 64 bits and results in bad conversions. The issue is seen in PMU busyness counters that look like they have wrapped around due to bad conversion. i915 PMU implementation returns monotonically increasing counters. If a count is lesser than previous one, it will only return the larger value until the smaller value catches up. The user will see this as zero delta between two measurements even though the engines are busy. Fix it by using mul_u64_u32_div() Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to pmu") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14955 Signed-off-by: Umesh Nerlige Ramappa Reviewed-by: Ashutosh Dixit Link: https://lore.kernel.org/r/20251016000350.1152382-2-umesh.nerlige.ramappa@intel.com (cherry picked from commit 2ada9cb1df3f5405a01d013b708b1b0914efccfe) Signed-off-by: Rodrigo Vivi [Rodrigo: Added the Fixes tag while cherry-picking to fixes] --- drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 88b147fa5cb1..c90b35881a26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -205,7 +205,7 @@ static u64 div_u64_roundup(u64 nom, u32 den) u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count) { - return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency); + return mul_u64_u32_div(count, NSEC_PER_SEC, gt->clock_frequency); } u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count) @@ -215,7 +215,7 @@ u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count) u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns) { - return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC); + return mul_u64_u32_div(ns, gt->clock_frequency, NSEC_PER_SEC); } u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns) From 1642fabff19f768e9a8fea85d55a2d6d43ecffe3 Mon Sep 17 00:00:00 2001 From: Chu Guangqing Date: Fri, 31 Oct 2025 13:52:40 +0800 Subject: [PATCH 0651/1075] ACPI: CPPC: Fix typo in a comment Fix spelling from "pachage" to "package". Signed-off-by: Chu Guangqing [ rjw: Changelog and subject edits ] Link: https://patch.msgid.link/20251031055240.2791-1-chuguangqing@inspur.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index ab4651205e8a..6c684e54fe01 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -750,7 +750,7 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) } /* - * Disregard _CPC if the number of entries in the return pachage is not + * Disregard _CPC if the number of entries in the return package is not * as expected, but support future revisions being proper supersets of * the v3 and only causing more entries to be returned by _CPC. */ From fde7f626423295eeebcd0305c3ee4b7f2f45b655 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 11:45:29 -0300 Subject: [PATCH 0652/1075] MAINTAINERS: Add James Clark as a perf tools reviewer James Clark has been actively reviewing patches and contributing to perf tools. Reflect this by adding him as a reviewer in the MAINTAINERS file. Reviewed-by: James Clark Acked-by: Ian Rogers Acked-by: Ingo Molnar Acked-by: Namhyung Kim Acked-by: Peter Zijlstra (Intel) Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f6cda468095d..13a80d4a8b6b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -20157,6 +20157,7 @@ R: Alexander Shishkin R: Jiri Olsa R: Ian Rogers R: Adrian Hunter +R: James Clark L: linux-perf-users@vger.kernel.org L: linux-kernel@vger.kernel.org S: Supported From 01e11d18da818eabdb3769081e7123ed8bfe863c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 31 Oct 2025 16:03:43 +0300 Subject: [PATCH 0653/1075] ACPI: SBS: Fix present test in acpi_battery_read() The battery->present variable is a 1 bit bitfield in a u8. This means that the "state & (1 << battery->id)" test will only work when "battery->id" is zero, otherwise ->present is zero. Fix this by adding a !!. Fixes: db1c291af7ad ("ACPI: SBS: Make SBS reads table-driven.") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aQSzr4NynN2mpEvG@stanley.mountain Signed-off-by: Rafael J. Wysocki --- drivers/acpi/sbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index a3f95a3fffde..d3edc3bcbf01 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -487,7 +487,7 @@ static int acpi_battery_read(struct acpi_battery *battery) if (result) return result; - battery->present = state & (1 << battery->id); + battery->present = !!(state & (1 << battery->id)); if (!battery->present) return 0; From b1d46bc10ff2e6241f331e2935acd05d3d025816 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 12:10:17 -0300 Subject: [PATCH 0654/1075] tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources To pick the changes in: fddd07626baa419c ("KVM: x86: Define AMD's #HV, #VC, and #SX exception vectors") f2f5519aa4e3ec4e ("KVM: x86: Define Control Protection Exception (#CP) vector") 9d6812d415358372 ("KVM: x86: Enable guest SSP read/write interface with new uAPIs") 06f2969c6a1237f0 ("KVM: x86: Introduce KVM_{G,S}ET_ONE_REG uAPIs support") That just rebuilds kvm-stat.c on x86, no change in functionality. This silences these perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Cc: Sean Christopherson Cc: Yang Weijiang Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 0f15d683817d..d420c9c066d4 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -35,6 +35,11 @@ #define MC_VECTOR 18 #define XM_VECTOR 19 #define VE_VECTOR 20 +#define CP_VECTOR 21 + +#define HV_VECTOR 28 +#define VC_VECTOR 29 +#define SX_VECTOR 30 /* Select x86 specific features in */ #define __KVM_HAVE_PIT @@ -411,6 +416,35 @@ struct kvm_xcrs { __u64 padding[16]; }; +#define KVM_X86_REG_TYPE_MSR 2 +#define KVM_X86_REG_TYPE_KVM 3 + +#define KVM_X86_KVM_REG_SIZE(reg) \ +({ \ + reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \ +}) + +#define KVM_X86_REG_TYPE_SIZE(type, reg) \ +({ \ + __u64 type_size = (__u64)type << 32; \ + \ + type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \ + type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \ + 0; \ + type_size; \ +}) + +#define KVM_X86_REG_ID(type, index) \ + (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index) + +#define KVM_X86_REG_MSR(index) \ + KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index) +#define KVM_X86_REG_KVM(index) \ + KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index) + +/* KVM-defined registers starting from 0 */ +#define KVM_REG_GUEST_SSP 0 + #define KVM_SYNC_X86_REGS (1UL << 0) #define KVM_SYNC_X86_SREGS (1UL << 1) #define KVM_SYNC_X86_EVENTS (1UL << 2) From 649a0cc96ed1170cea5686f11229e07014b3d18f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 12:18:17 -0300 Subject: [PATCH 0655/1075] tools headers svm: Sync svm headers with the kernel sources To pick the changes in: b8c3c9f5d0505905 ("x86/apic: Initialize Secure AVIC APIC backing page") That triggers: CC /tmp/build/perf-tools/arch/x86/util/kvm-stat.o LD /tmp/build/perf-tools/arch/x86/util/perf-util-in.o LD /tmp/build/perf-tools/arch/x86/perf-util-in.o LD /tmp/build/perf-tools/arch/perf-util-in.o LD /tmp/build/perf-tools/perf-util-in.o AR /tmp/build/perf-tools/libperf-util.a LINK /tmp/build/perf-tools/perf But this time causes no changes in tooling results, as the introduced SVM_VMGEXIT_SAVIC exit reason wasn't added to SVM_EXIT_REASONS, that is used in kvm-stat.c. And addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h Please see tools/include/uapi/README for further details. Cc: Borislav Petkov (AMD) Cc: Neeraj Upadhyay Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/svm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 9c640a521a67..650e3256ea7d 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -118,6 +118,10 @@ #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 +#define SVM_VMGEXIT_SAVIC 0x8000001a +#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 +#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1 +#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ From fc9ef9118d8a6c6250273aae14b48dd8f91bee1f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 13:24:47 -0300 Subject: [PATCH 0656/1075] tools headers UAPI: Sync KVM's vmx.h header with the kernel sources to handle new exit reasons To pick the changes in: 885df2d2109a60f8 ("KVM: x86: Add support for RDMSR/WRMSRNS w/ immediate on Intel") c42856af8f70d983 ("KVM: TDX: Add a place holder for handler of TDX hypercalls (TDG.VP.VMCALL)") That makes 'perf kvm-stat' aware of these new TDCALL and MSR_{READ,WRITE}_IMM exit reasons, thus addressing the following perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Please see tools/include/uapi/README for further details. Cc: Sean Christopherson Cc: Xin Li Cc: Isaku Yamahata Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/vmx.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index f0f4a4cf84a7..9792e329343e 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -94,6 +94,8 @@ #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 #define EXIT_REASON_TDCALL 77 +#define EXIT_REASON_MSR_READ_IMM 84 +#define EXIT_REASON_MSR_WRITE_IMM 85 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -158,7 +160,9 @@ { EXIT_REASON_TPAUSE, "TPAUSE" }, \ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ { EXIT_REASON_NOTIFY, "NOTIFY" }, \ - { EXIT_REASON_TDCALL, "TDCALL" } + { EXIT_REASON_TDCALL, "TDCALL" }, \ + { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \ + { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } From 549042f16716b4c72bfa9813d9e38f352c539dad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 13:35:06 -0300 Subject: [PATCH 0657/1075] tools headers asm: Sync fls headers header with the kernel sources To pick the changes in: 6606c8c7e8188656 ("bitops: Add __attribute_const__ to generic ffs()-family implementations") This addresses these tools build warnings: Warning: Kernel ABI header differences: diff -u tools/include/asm-generic/bitops/__fls.h include/asm-generic/bitops/__fls.h diff -u tools/include/asm-generic/bitops/fls.h include/asm-generic/bitops/fls.h diff -u tools/include/asm-generic/bitops/fls64.h include/asm-generic/bitops/fls64.h Please see tools/include/uapi/README for further details. Cc: Kees Cook Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/asm-generic/bitops/__fls.h | 2 +- tools/include/asm-generic/bitops/fls.h | 2 +- tools/include/asm-generic/bitops/fls64.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h index e974ec932ec1..35f33780ca6c 100644 --- a/tools/include/asm-generic/bitops/__fls.h +++ b/tools/include/asm-generic/bitops/__fls.h @@ -10,7 +10,7 @@ * * Undefined if no set bit exists, so code should check against 0 first. */ -static __always_inline unsigned int generic___fls(unsigned long word) +static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word) { unsigned int num = BITS_PER_LONG - 1; diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h index 26f3ce1dd6e4..8eed3437edb9 100644 --- a/tools/include/asm-generic/bitops/fls.h +++ b/tools/include/asm-generic/bitops/fls.h @@ -10,7 +10,7 @@ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. */ -static __always_inline int generic_fls(unsigned int x) +static __always_inline __attribute_const__ int generic_fls(unsigned int x) { int r = 32; diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h index 866f2b2304ff..b5f58dd261a3 100644 --- a/tools/include/asm-generic/bitops/fls64.h +++ b/tools/include/asm-generic/bitops/fls64.h @@ -16,7 +16,7 @@ * at position 64. */ #if BITS_PER_LONG == 32 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { __u32 h = x >> 32; if (h) @@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x) return fls(x); } #elif BITS_PER_LONG == 64 -static __always_inline int fls64(__u64 x) +static __always_inline __attribute_const__ int fls64(__u64 x) { if (x == 0) return 0; From 6a0dddc53f6f04281b27254a7d940fb9bb2d5a3b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 3 Nov 2025 16:19:06 +0100 Subject: [PATCH 0658/1075] i2c: muxes: pca954x: Fix broken reset-gpio usage Revert commit 690de2902dca ("i2c: muxes: pca954x: Use reset controller only") and its dependent commit 94c296776403 ("i2c: muxes: pca954x: Reset if (de)select fails") because the first breaks all users of the driver, by requiring a completely optional reset-gpio driver. These commits cause that mux driver simply stops working when optional reset-gpio is not included, but that reset-gpio is not pulled anyhow. Driver cannot remove legacy reset-gpios handling. Fixes: 690de2902dca ("i2c: muxes: pca954x: Use reset controller only") Signed-off-by: Krzysztof Kozlowski Signed-off-by: Wolfram Sang --- drivers/i2c/muxes/i2c-mux-pca954x.c | 50 +++++++++++++---------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index 75c8d08fa24e..b9f370c9f018 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -118,6 +118,7 @@ struct pca954x { raw_spinlock_t lock; struct regulator *supply; + struct gpio_desc *reset_gpio; struct reset_control *reset_cont; }; @@ -315,25 +316,6 @@ static u8 pca954x_regval(struct pca954x *data, u8 chan) return 1 << chan; } -static void pca954x_reset_assert(struct pca954x *data) -{ - if (data->reset_cont) - reset_control_assert(data->reset_cont); -} - -static void pca954x_reset_deassert(struct pca954x *data) -{ - if (data->reset_cont) - reset_control_deassert(data->reset_cont); -} - -static void pca954x_reset_mux(struct pca954x *data) -{ - pca954x_reset_assert(data); - udelay(1); - pca954x_reset_deassert(data); -} - static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) { struct pca954x *data = i2c_mux_priv(muxc); @@ -347,8 +329,6 @@ static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) ret = pca954x_reg_write(muxc->parent, client, regval); data->last_chan = ret < 0 ? 0 : regval; } - if (ret == -ETIMEDOUT && data->reset_cont) - pca954x_reset_mux(data); return ret; } @@ -358,7 +338,6 @@ static int pca954x_deselect_mux(struct i2c_mux_core *muxc, u32 chan) struct pca954x *data = i2c_mux_priv(muxc); struct i2c_client *client = data->client; s32 idle_state; - int ret = 0; idle_state = READ_ONCE(data->idle_state); if (idle_state >= 0) @@ -368,10 +347,8 @@ static int pca954x_deselect_mux(struct i2c_mux_core *muxc, u32 chan) if (idle_state == MUX_IDLE_DISCONNECT) { /* Deselect active channel */ data->last_chan = 0; - ret = pca954x_reg_write(muxc->parent, client, - data->last_chan); - if (ret == -ETIMEDOUT && data->reset_cont) - pca954x_reset_mux(data); + return pca954x_reg_write(muxc->parent, client, + data->last_chan); } /* otherwise leave as-is */ @@ -550,10 +527,29 @@ static int pca954x_get_reset(struct device *dev, struct pca954x *data) if (IS_ERR(data->reset_cont)) return dev_err_probe(dev, PTR_ERR(data->reset_cont), "Failed to get reset\n"); + else if (data->reset_cont) + return 0; + + /* + * fallback to legacy reset-gpios + */ + data->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(data->reset_gpio)) { + return dev_err_probe(dev, PTR_ERR(data->reset_gpio), + "Failed to get reset gpio"); + } return 0; } +static void pca954x_reset_deassert(struct pca954x *data) +{ + if (data->reset_cont) + reset_control_deassert(data->reset_cont); + else + gpiod_set_value_cansleep(data->reset_gpio, 0); +} + /* * I2C init/probing/exit functions */ @@ -593,7 +589,7 @@ static int pca954x_probe(struct i2c_client *client) if (ret) goto fail_cleanup; - if (data->reset_cont) { + if (data->reset_cont || data->reset_gpio) { udelay(1); pca954x_reset_deassert(data); /* Give the chip some time to recover. */ From 7f17ef0d47b9aa01b89a92b2514fbfe800d8aeca Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 Nov 2025 14:54:31 -0300 Subject: [PATCH 0659/1075] perf symbols: Handle '1' symbols in /proc/kallsyms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I started seeing this in recent Fedora 42 kernels: root@x1:~# uname -a Linux x1 6.17.4-200.fc42.x86_64 #1 SMP PREEMPT_DYNAMIC Sun Oct 19 18:47:49 UTC 2025 x86_64 GNU/Linux root@x1:~# root@x1:~# perf test 1 1: vmlinux symtab matches kallsyms : FAILED! root@x1:~# Related to: root@x1:~# grep ' 1 ' /proc/kallsyms ffffffffb098bc00 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ ffffffffb098bc10 1 _RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ root@x1:~# That is found in: root@x1:~# pahole --running_kernel_vmlinux /usr/lib/debug/lib/modules/6.17.4-200.fc42.x86_64/vmlinux root@x1:~# root@x1:~# readelf -sW /usr/lib/debug/lib/modules/6.17.4-200.fc42.x86_64/vmlinux | grep __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ 150649: ffffffff81f8bc00 16 FUNC LOCAL DEFAULT 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ root@x1:~# But was being filtered out when reading /proc/kallsyms, as the '1' symbol type was not being handled, do it, there are just two of them at this point. Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: Björn Roy Baron Cc: Boqun Feng Cc: Danilo Krummrich Cc: Gary Guo Cc: Miguel Ojeda Cc: Trevor Gross Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cc26b7bf302b..948d3e8ad782 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -112,9 +112,13 @@ static bool symbol_type__filter(char __symbol_type) // 'N' first seen in: // ffffffff9b35d130 N __pfx__RNCINvNtNtNtCsbDUBuN8AbD4_4core4iter8adapters3map12map_try_foldjNtCs6vVzKs5jPr6_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ // a seemingly Rust mangled name + // Ditto for '1': + // root@x1:~# grep ' 1 ' /proc/kallsyms + // ffffffffb098bc00 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ + // ffffffffb098bc10 1 _RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ char symbol_type = toupper(__symbol_type); return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' || - __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N'; + __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N' || __symbol_type == '1'; } static int prefix_underscores_count(const char *str) From 16c43a56b79e2c3220b043236369a129d508c65a Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 2 Nov 2025 22:28:52 +0100 Subject: [PATCH 0660/1075] rust: kbuild: treat `build_error` and `rustdoc` as kernel objects Even if normally `build_error` isn't a kernel object, it should still be treated as such so that we pass the same flags. Similarly, `rustdoc` targets are never kernel objects, but we need to treat them as such. Otherwise, starting with Rust 1.91.0 (released 2025-10-30), `rustc` will complain about missing sanitizer flags since `-Zsanitizer` is a target modifier too [1]: error: mixing `-Zsanitizer` will cause an ABI mismatch in crate `build_error` --> rust/build_error.rs:3:1 | 3 | //! Build-time error. | ^ | = help: the `-Zsanitizer` flag modifies the ABI so Rust crates compiled with different values of this flag cannot be used together safely = note: unset `-Zsanitizer` in this crate is incompatible with `-Zsanitizer=kernel-address` in dependency `core` = help: set `-Zsanitizer=kernel-address` in this crate or unset `-Zsanitizer` in `core` = help: if you are sure this will not cause problems, you may use `-Cunsafe-allow-abi-mismatch=sanitizer` to silence this error Thus explicitly mark them as kernel objects. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust/pull/138736 [1] Reviewed-by: Alice Ryhl Tested-by: Justin M. Forbes Link: https://patch.msgid.link/20251102212853.1505384-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/rust/Makefile b/rust/Makefile index 23c7ae905bd2..a9fb9354b659 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -127,9 +127,14 @@ rustdoc-core: private rustc_target_flags = --edition=$(core-edition) $(core-cfgs rustdoc-core: $(RUST_LIB_SRC)/core/src/lib.rs rustdoc-clean FORCE +$(call if_changed,rustdoc) +# Even if `rustdoc` targets are not kernel objects, they should still be +# treated as such so that we pass the same flags. Otherwise, for instance, +# `rustdoc` will complain about missing sanitizer flags causing an ABI mismatch. +rustdoc-compiler_builtins: private is-kernel-object := y rustdoc-compiler_builtins: $(src)/compiler_builtins.rs rustdoc-core FORCE +$(call if_changed,rustdoc) +rustdoc-ffi: private is-kernel-object := y rustdoc-ffi: $(src)/ffi.rs rustdoc-core FORCE +$(call if_changed,rustdoc) @@ -147,6 +152,7 @@ rustdoc-pin_init: $(src)/pin-init/src/lib.rs rustdoc-pin_init_internal \ rustdoc-macros FORCE +$(call if_changed,rustdoc) +rustdoc-kernel: private is-kernel-object := y rustdoc-kernel: private rustc_target_flags = --extern ffi --extern pin_init \ --extern build_error --extern macros \ --extern bindings --extern uapi @@ -522,6 +528,10 @@ $(obj)/pin_init.o: $(src)/pin-init/src/lib.rs $(obj)/compiler_builtins.o \ $(obj)/$(libpin_init_internal_name) $(obj)/$(libmacros_name) FORCE +$(call if_changed_rule,rustc_library) +# Even if normally `build_error` is not a kernel object, it should still be +# treated as such so that we pass the same flags. Otherwise, for instance, +# `rustc` will complain about missing sanitizer flags causing an ABI mismatch. +$(obj)/build_error.o: private is-kernel-object := y $(obj)/build_error.o: private skip_gendwarfksyms = 1 $(obj)/build_error.o: $(src)/build_error.rs $(obj)/compiler_builtins.o FORCE +$(call if_changed_rule,rustc_library) From fad472efab0a805dd939f017c5b8669a786a4bcf Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sun, 2 Nov 2025 22:28:53 +0100 Subject: [PATCH 0661/1075] rust: kbuild: workaround `rustdoc` doctests modifier bug The `rustdoc` modifiers bug [1] was fixed in Rust 1.90.0 [2], for which we added a workaround in commit abbf9a449441 ("rust: workaround `rustdoc` target modifiers bug"). However, `rustdoc`'s doctest generation still has a similar issue [3], being fixed at [4], which does not affect us because we apply the workaround to both, and now, starting with Rust 1.91.0 (released 2025-10-30), `-Zsanitizer` is a target modifier too [5], which means we fail with: RUSTDOC TK rust/kernel/lib.rs error: mixing `-Zsanitizer` will cause an ABI mismatch in crate `kernel` --> rust/kernel/lib.rs:3:1 | 3 | //! The `kernel` crate. | ^ | = help: the `-Zsanitizer` flag modifies the ABI so Rust crates compiled with different values of this flag cannot be used together safely = note: unset `-Zsanitizer` in this crate is incompatible with `-Zsanitizer=kernel-address` in dependency `core` = help: set `-Zsanitizer=kernel-address` in this crate or unset `-Zsanitizer` in `core` = help: if you are sure this will not cause problems, you may use `-Cunsafe-allow-abi-mismatch=sanitizer` to silence this error A simple way around is to add the sanitizer to the list in the existing workaround (especially if we had not started to pass the sanitizer flags in the previous commit, since in that case that would not be necessary). However, that still applies the workaround in more cases than necessary. Instead, only modify the doctests flags to ignore the check for sanitizers, so that it is more local (and thus the compiler keeps checking it for us in the normal `rustdoc` calls). Since the previous commit already treated the `rustdoc` calls as kernel objects, this should allow us in the future to easily remove this workaround when the time comes. By the way, the `-Cunsafe-allow-abi-mismatch` flag overwrites previous ones rather than appending, so it needs to be all done in the same flag. Moreover, unknown modifiers are rejected, and thus we have to gate based on the version too. Finally, `-Zsanitizer-cfi-normalize-integers` is not affected (in Rust 1.91.0), so it is not needed in the workaround for the moment. Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). Link: https://github.com/rust-lang/rust/issues/144521 [1] Link: https://github.com/rust-lang/rust/pull/144523 [2] Link: https://github.com/rust-lang/rust/issues/146465 [3] Link: https://github.com/rust-lang/rust/pull/148068 [4] Link: https://github.com/rust-lang/rust/pull/138736 [5] Reviewed-by: Alice Ryhl Tested-by: Justin M. Forbes Link: https://patch.msgid.link/20251102212853.1505384-2-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- rust/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index a9fb9354b659..3e545c1a0ff4 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -69,6 +69,9 @@ core-edition := $(if $(call rustc-min-version,108700),2024,2021) # the time being (https://github.com/rust-lang/rust/issues/144521). rustdoc_modifiers_workaround := $(if $(call rustc-min-version,108800),-Cunsafe-allow-abi-mismatch=fixed-x18) +# Similarly, for doctests (https://github.com/rust-lang/rust/issues/146465). +doctests_modifiers_workaround := $(rustdoc_modifiers_workaround)$(if $(call rustc-min-version,109100),$(comma)sanitizer) + # `rustc` recognizes `--remap-path-prefix` since 1.26.0, but `rustdoc` only # since Rust 1.81.0. Moreover, `rustdoc` ICEs on out-of-tree builds since Rust # 1.82.0 (https://github.com/rust-lang/rust/issues/138520). Thus workaround both @@ -236,7 +239,7 @@ quiet_cmd_rustdoc_test_kernel = RUSTDOC TK $< --extern bindings --extern uapi \ --no-run --crate-name kernel -Zunstable-options \ --sysroot=/dev/null \ - $(rustdoc_modifiers_workaround) \ + $(doctests_modifiers_workaround) \ --test-builder $(objtree)/scripts/rustdoc_test_builder \ $< $(rustdoc_test_kernel_quiet); \ $(objtree)/scripts/rustdoc_test_gen From b2b526c2cf57d14ee269e012ed179081871f45a1 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 31 Oct 2025 09:15:53 -0700 Subject: [PATCH 0662/1075] net: mdio: Check regmap pointer returned by device_node_to_regmap() The call to device_node_to_regmap() in airoha_mdio_probe() can return an ERR_PTR() if regmap initialization fails. Currently, the driver stores the pointer without validation, which could lead to a crash if it is later dereferenced. Add an IS_ERR() check and return the corresponding error code to make the probe path more robust. Fixes: 67e3ba978361 ("net: mdio: Add MDIO bus controller for Airoha AN7583") Signed-off-by: Alok Tiwari Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251031161607.58581-1-alok.a.tiwari@oracle.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-airoha.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/mdio/mdio-airoha.c b/drivers/net/mdio/mdio-airoha.c index 1dc9939c8d7d..52e7475121ea 100644 --- a/drivers/net/mdio/mdio-airoha.c +++ b/drivers/net/mdio/mdio-airoha.c @@ -219,6 +219,8 @@ static int airoha_mdio_probe(struct platform_device *pdev) priv = bus->priv; priv->base_addr = addr; priv->regmap = device_node_to_regmap(dev->parent->of_node); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); priv->clk = devm_clk_get_enabled(dev, NULL); if (IS_ERR(priv->clk)) From 7ed8b63ddc9a9578eae81f4da32761568a25efad Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 1 Nov 2025 11:39:54 +0100 Subject: [PATCH 0663/1075] MAINTAINERS: add brcm tag driver to b53 The b53 entry was missing the brcm tag driver, so add it. Reported-by: Jakub Kicinski Link: https://lore.kernel.org/netdev/20251029181216.3f35f8ba@kernel.org/ Signed-off-by: Jonas Gorski Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251101103954.29816-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0518f1f4f3b5..92e9cd1a363b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4818,6 +4818,7 @@ F: drivers/net/dsa/b53/* F: drivers/net/dsa/bcm_sf2* F: include/linux/dsa/brcm.h F: include/linux/platform_data/b53.h +F: net/dsa/tag_brcm.c BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE M: Florian Fainelli From b6a8a5477fe9bd6be2b594a88f82f8bba41e6d54 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 1 Nov 2025 14:28:06 +0100 Subject: [PATCH 0664/1075] net: dsa: b53: fix resetting speed and pause on forced link There is no guarantee that the port state override registers have their default values, as not all switches support being reset via register or have a reset GPIO. So when forcing port config, we need to make sure to clear all fields, which we currently do not do for the speed and flow control configuration. This can cause flow control stay enabled, or in the case of speed becoming an illegal value, e.g. configured for 1G (0x2), then setting 100M (0x1), results in 0x3 which is invalid. For PORT_OVERRIDE_SPEED_2000M we need to make sure to only clear it on supported chips, as the bit can have different meanings on other chips, e.g. for BCM5389 this controls scanning PHYs for link/speed configuration. Fixes: 5e004460f874 ("net: dsa: b53: Add helper to set link parameters") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251101132807.50419-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 2f846381d5a7..cb28256ef3cc 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1372,6 +1372,10 @@ static void b53_force_port_config(struct b53_device *dev, int port, else reg &= ~PORT_OVERRIDE_FULL_DUPLEX; + reg &= ~(0x3 << GMII_PO_SPEED_S); + if (is5301x(dev) || is58xx(dev)) + reg &= ~PORT_OVERRIDE_SPEED_2000M; + switch (speed) { case 2000: reg |= PORT_OVERRIDE_SPEED_2000M; @@ -1390,6 +1394,11 @@ static void b53_force_port_config(struct b53_device *dev, int port, return; } + if (is5325(dev)) + reg &= ~PORT_OVERRIDE_LP_FLOW_25; + else + reg &= ~(PORT_OVERRIDE_RX_FLOW | PORT_OVERRIDE_TX_FLOW); + if (rx_pause) { if (is5325(dev)) reg |= PORT_OVERRIDE_LP_FLOW_25; From 3e4ebdc1606adf77744cf8ed7a433d279fdc57ba Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 1 Nov 2025 14:28:07 +0100 Subject: [PATCH 0665/1075] net: dsa: b53: fix bcm63xx RGMII port link adjustment BCM63XX's switch does not support MDIO scanning of external phys, so its MACs needs to be manually configured for autonegotiated link speeds. So b53_force_port_config() and b53_force_link() accordingly also when mode is MLO_AN_PHY for those ports. Fixes lower speeds than 1000/full on rgmii ports 4 - 7. This aligns the behaviour with the old bcm63xx_enetsw driver for those ports. Fixes: 967dd82ffc52 ("net: dsa: b53: Add support for Broadcom RoboSwitch") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251101132807.50419-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index cb28256ef3cc..bb2c6dfa7835 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1602,8 +1602,11 @@ static void b53_phylink_mac_link_down(struct phylink_config *config, struct b53_device *dev = dp->ds->priv; int port = dp->index; - if (mode == MLO_AN_PHY) + if (mode == MLO_AN_PHY) { + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) + b53_force_link(dev, port, false); return; + } if (mode == MLO_AN_FIXED) { b53_force_link(dev, port, false); @@ -1631,6 +1634,13 @@ static void b53_phylink_mac_link_up(struct phylink_config *config, if (mode == MLO_AN_PHY) { /* Re-negotiate EEE if it was enabled already */ p->eee_enabled = b53_eee_init(ds, port, phydev); + + if (is63xx(dev) && in_range(port, B53_63XX_RGMII0, 4)) { + b53_force_port_config(dev, port, speed, duplex, + tx_pause, rx_pause); + b53_force_link(dev, port, true); + } + return; } From c264294624e956a967a9e2e5fa41e3273340b089 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 2 Nov 2025 11:07:56 +0100 Subject: [PATCH 0666/1075] net: dsa: b53: fix enabling ip multicast In the New Control register bit 1 is either reserved, or has a different function: Out of Range Error Discard When enabled, the ingress port discards any frames if the Length field is between 1500 and 1536 (excluding 1500 and 1536) and with good CRC. The actual bit for enabling IP multicast is bit 0, which was only explicitly enabled for BCM5325 so far. For older switch chips, this bit defaults to 0, so we want to enable it as well, while newer switch chips default to 1, and their documentation says "It is illegal to set this bit to zero." So drop the wrong B53_IPMC_FWD_EN define, enable the IP multicast bit also for other switch chips. While at it, rename it to (B53_)IP_MC as that is how it is called in Broadcom code. Fixes: 63cc54a6f073 ("net: dsa: b53: Fix egress flooding settings") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251102100758.28352-2-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 4 ++-- drivers/net/dsa/b53/b53_regs.h | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index bb2c6dfa7835..58c31049c0e7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -371,11 +371,11 @@ static void b53_set_forwarding(struct b53_device *dev, int enable) * frames should be flooded or not. */ b53_read8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, &mgmt); - mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN | B53_IPMC_FWD_EN; + mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN | B53_IP_MC; b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt); } else { b53_read8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, &mgmt); - mgmt |= B53_IP_MCAST_25; + mgmt |= B53_IP_MC; b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt); } } diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 309fe0e46dad..8ce1ce72e938 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -111,8 +111,7 @@ /* IP Multicast control (8 bit) */ #define B53_IP_MULTICAST_CTRL 0x21 -#define B53_IP_MCAST_25 BIT(0) -#define B53_IPMC_FWD_EN BIT(1) +#define B53_IP_MC BIT(0) #define B53_UC_FWD_EN BIT(6) #define B53_MC_FWD_EN BIT(7) From 0be04b5fa62a82a9929ca261f6c9f64a3d0a28da Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 2 Nov 2025 11:07:57 +0100 Subject: [PATCH 0667/1075] net: dsa: b53: stop reading ARL entries if search is done The switch clears the ARL_SRCH_STDN bit when the search is done, i.e. it finished traversing the ARL table. This means that there will be no valid result, so we should not attempt to read and process any further entries. We only ever check the validity of the entries for 4 ARL bin chips, and only after having passed the first entry to the b53_fdb_copy(). This means that we always pass an invalid entry at the end to the b53_fdb_copy(). b53_fdb_copy() does check the validity though before passing on the entry, so it never gets passed on. On < 4 ARL bin chips, we will even continue reading invalid entries until we reach the result limit. Fixes: 1da6df85c6fb ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251102100758.28352-3-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 58c31049c0e7..b467500699c7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2037,7 +2037,7 @@ static int b53_arl_search_wait(struct b53_device *dev) do { b53_read8(dev, B53_ARLIO_PAGE, offset, ®); if (!(reg & ARL_SRCH_STDN)) - return 0; + return -ENOENT; if (reg & ARL_SRCH_VLID) return 0; From e57723fe536f040cc2635ec1545dd0a7919a321e Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 2 Nov 2025 11:07:58 +0100 Subject: [PATCH 0668/1075] net: dsa: b53: properly bound ARL searches for < 4 ARL bin chips When iterating over the ARL table we stop at max ARL entries / 2, but this is only valid if the chip actually returns 2 results at once. For chips with only one result register we will stop before reaching the end of the table if it is more than half full. Fix this by only dividing the maximum results by two if we have a chip with more than one result register (i.e. those with 4 ARL bins). Fixes: cd169d799bee ("net: dsa: b53: Bound check ARL searches") Signed-off-by: Jonas Gorski Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251102100758.28352-4-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b467500699c7..eb767edc4c13 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2087,13 +2087,16 @@ static int b53_fdb_copy(int port, const struct b53_arl_entry *ent, int b53_fdb_dump(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data) { + unsigned int count = 0, results_per_hit = 1; struct b53_device *priv = ds->priv; struct b53_arl_entry results[2]; - unsigned int count = 0; u8 offset; int ret; u8 reg; + if (priv->num_arl_bins > 2) + results_per_hit = 2; + mutex_lock(&priv->arl_mutex); if (is5325(priv) || is5365(priv)) @@ -2115,7 +2118,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, if (ret) break; - if (priv->num_arl_bins > 2) { + if (results_per_hit == 2) { b53_arl_search_rd(priv, 1, &results[1]); ret = b53_fdb_copy(port, &results[1], cb, data); if (ret) @@ -2125,7 +2128,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port, break; } - } while (count++ < b53_max_arl_entries(priv) / 2); + } while (count++ < b53_max_arl_entries(priv) / results_per_hit); mutex_unlock(&priv->arl_mutex); From c8732e933925e188cbb1d9bc3a5e1ae9affe6869 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 31 Oct 2025 13:16:28 +0100 Subject: [PATCH 0669/1075] net: phy: micrel: lan8842 errata Add errata for lan8842. The errata document can be found here [1]. This is fixing the module 2 ("Analog front-end not optimized for PHY-side shorted center taps"). [1] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/LAN8842-Errata-DS80001172.pdf Fixes: 5a774b64cd6a ("net: phy: micrel: Add support for lan8842") Reviewed-by: Andrew Lunn Signed-off-by: Horatiu Vultur Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 147 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 604b5de0c158..1fa56d4c1793 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -466,6 +466,12 @@ struct lan8842_priv { u16 rev; }; +struct lanphy_reg_data { + int page; + u16 addr; + u16 val; +}; + static const struct kszphy_type lan8814_type = { .led_mode_reg = ~LAN8814_LED_CTRL_1, .cable_diag_reg = LAN8814_CABLE_DIAG, @@ -2835,6 +2841,13 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, */ #define LAN8814_PAGE_PCS_DIGITAL 2 +/** + * LAN8814_PAGE_EEE - Selects Extended Page 3. + * + * This page contains EEE registers + */ +#define LAN8814_PAGE_EEE 3 + /** * LAN8814_PAGE_COMMON_REGS - Selects Extended Page 4. * @@ -2853,6 +2866,13 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev, */ #define LAN8814_PAGE_PORT_REGS 5 +/** + * LAN8814_PAGE_POWER_REGS - Selects Extended Page 28. + * + * This page contains analog control registers and power mode registers. + */ +#define LAN8814_PAGE_POWER_REGS 28 + /** * LAN8814_PAGE_SYSTEM_CTRL - Selects Extended Page 31. * @@ -5884,6 +5904,128 @@ static int lan8842_probe(struct phy_device *phydev) return 0; } +#define LAN8814_POWER_MGMT_MODE_3_ANEG_MDI 0x13 +#define LAN8814_POWER_MGMT_MODE_4_ANEG_MDIX 0x14 +#define LAN8814_POWER_MGMT_MODE_5_10BT_MDI 0x15 +#define LAN8814_POWER_MGMT_MODE_6_10BT_MDIX 0x16 +#define LAN8814_POWER_MGMT_MODE_7_100BT_TRAIN 0x17 +#define LAN8814_POWER_MGMT_MODE_8_100BT_MDI 0x18 +#define LAN8814_POWER_MGMT_MODE_9_100BT_EEE_MDI_TX 0x19 +#define LAN8814_POWER_MGMT_MODE_10_100BT_EEE_MDI_RX 0x1a +#define LAN8814_POWER_MGMT_MODE_11_100BT_MDIX 0x1b +#define LAN8814_POWER_MGMT_MODE_12_100BT_EEE_MDIX_TX 0x1c +#define LAN8814_POWER_MGMT_MODE_13_100BT_EEE_MDIX_RX 0x1d +#define LAN8814_POWER_MGMT_MODE_14_100BTX_EEE_TX_RX 0x1e + +#define LAN8814_POWER_MGMT_DLLPD_D BIT(0) +#define LAN8814_POWER_MGMT_ADCPD_D BIT(1) +#define LAN8814_POWER_MGMT_PGAPD_D BIT(2) +#define LAN8814_POWER_MGMT_TXPD_D BIT(3) +#define LAN8814_POWER_MGMT_DLLPD_C BIT(4) +#define LAN8814_POWER_MGMT_ADCPD_C BIT(5) +#define LAN8814_POWER_MGMT_PGAPD_C BIT(6) +#define LAN8814_POWER_MGMT_TXPD_C BIT(7) +#define LAN8814_POWER_MGMT_DLLPD_B BIT(8) +#define LAN8814_POWER_MGMT_ADCPD_B BIT(9) +#define LAN8814_POWER_MGMT_PGAPD_B BIT(10) +#define LAN8814_POWER_MGMT_TXPD_B BIT(11) +#define LAN8814_POWER_MGMT_DLLPD_A BIT(12) +#define LAN8814_POWER_MGMT_ADCPD_A BIT(13) +#define LAN8814_POWER_MGMT_PGAPD_A BIT(14) +#define LAN8814_POWER_MGMT_TXPD_A BIT(15) + +#define LAN8814_POWER_MGMT_C_D (LAN8814_POWER_MGMT_DLLPD_D | \ + LAN8814_POWER_MGMT_ADCPD_D | \ + LAN8814_POWER_MGMT_PGAPD_D | \ + LAN8814_POWER_MGMT_DLLPD_C | \ + LAN8814_POWER_MGMT_ADCPD_C | \ + LAN8814_POWER_MGMT_PGAPD_C) + +#define LAN8814_POWER_MGMT_B_C_D (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_DLLPD_B | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_B) + +#define LAN8814_POWER_MGMT_VAL1 (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_B | \ + LAN8814_POWER_MGMT_ADCPD_A | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL2 LAN8814_POWER_MGMT_C_D + +#define LAN8814_POWER_MGMT_VAL3 (LAN8814_POWER_MGMT_C_D | \ + LAN8814_POWER_MGMT_DLLPD_B | \ + LAN8814_POWER_MGMT_ADCPD_B | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL4 (LAN8814_POWER_MGMT_B_C_D | \ + LAN8814_POWER_MGMT_ADCPD_A | \ + LAN8814_POWER_MGMT_PGAPD_A) + +#define LAN8814_POWER_MGMT_VAL5 LAN8814_POWER_MGMT_B_C_D + +static const struct lanphy_reg_data short_center_tap_errata[] = { + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_3_ANEG_MDI, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_4_ANEG_MDIX, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_5_10BT_MDI, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_6_10BT_MDIX, + LAN8814_POWER_MGMT_VAL1 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_7_100BT_TRAIN, + LAN8814_POWER_MGMT_VAL2 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_8_100BT_MDI, + LAN8814_POWER_MGMT_VAL3 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_9_100BT_EEE_MDI_TX, + LAN8814_POWER_MGMT_VAL3 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_10_100BT_EEE_MDI_RX, + LAN8814_POWER_MGMT_VAL4 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_11_100BT_MDIX, + LAN8814_POWER_MGMT_VAL5 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_12_100BT_EEE_MDIX_TX, + LAN8814_POWER_MGMT_VAL5 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_13_100BT_EEE_MDIX_RX, + LAN8814_POWER_MGMT_VAL4 }, + { LAN8814_PAGE_POWER_REGS, + LAN8814_POWER_MGMT_MODE_14_100BTX_EEE_TX_RX, + LAN8814_POWER_MGMT_VAL4 }, +}; + +static int lanphy_write_reg_data(struct phy_device *phydev, + const struct lanphy_reg_data *data, + size_t num) +{ + int ret = 0; + + while (num--) { + ret = lanphy_write_page_reg(phydev, data->page, data->addr, + data->val); + if (ret) + break; + } + + return ret; +} + +static int lan8842_erratas(struct phy_device *phydev) +{ + return lanphy_write_reg_data(phydev, short_center_tap_errata, + ARRAY_SIZE(short_center_tap_errata)); +} + static int lan8842_config_init(struct phy_device *phydev) { int ret; @@ -5896,6 +6038,11 @@ static int lan8842_config_init(struct phy_device *phydev) if (ret < 0) return ret; + /* Apply the erratas for this device */ + ret = lan8842_erratas(phydev); + if (ret < 0) + return ret; + /* Even if the GPIOs are set to control the LEDs the behaviour of the * LEDs is wrong, they are not blinking when there is traffic. * To fix this it is required to set extended LED mode From 65bd9a262644b67490dddd93a2b842ac94ccbe36 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Fri, 31 Oct 2025 13:16:29 +0100 Subject: [PATCH 0670/1075] net: phy: micrel: lan8842 errata Add errata for lan8842. The errata document can be found here [1]. This is fixing the module 7 ("1000BASE-T PMA EEE TX wake timer is non-compliant") [1] https://ww1.microchip.com/downloads/aemDocuments/documents/UNG/ProductDocuments/Errata/LAN8842-Errata-DS80001172.pdf Fixes: 5a774b64cd6a ("net: phy: micrel: Add support for lan8842") Reviewed-by: Andrew Lunn Signed-off-by: Horatiu Vultur Reviewed-by: Russell King (Oracle) Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 1fa56d4c1793..6a1a424e3b30 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -5965,6 +5965,9 @@ static int lan8842_probe(struct phy_device *phydev) #define LAN8814_POWER_MGMT_VAL5 LAN8814_POWER_MGMT_B_C_D +#define LAN8814_EEE_WAKE_TX_TIMER 0x0e +#define LAN8814_EEE_WAKE_TX_TIMER_MAX_VAL 0x1f + static const struct lanphy_reg_data short_center_tap_errata[] = { { LAN8814_PAGE_POWER_REGS, LAN8814_POWER_MGMT_MODE_3_ANEG_MDI, @@ -6004,6 +6007,12 @@ static const struct lanphy_reg_data short_center_tap_errata[] = { LAN8814_POWER_MGMT_VAL4 }, }; +static const struct lanphy_reg_data waketx_timer_errata[] = { + { LAN8814_PAGE_EEE, + LAN8814_EEE_WAKE_TX_TIMER, + LAN8814_EEE_WAKE_TX_TIMER_MAX_VAL }, +}; + static int lanphy_write_reg_data(struct phy_device *phydev, const struct lanphy_reg_data *data, size_t num) @@ -6022,8 +6031,15 @@ static int lanphy_write_reg_data(struct phy_device *phydev, static int lan8842_erratas(struct phy_device *phydev) { - return lanphy_write_reg_data(phydev, short_center_tap_errata, + int ret; + + ret = lanphy_write_reg_data(phydev, short_center_tap_errata, ARRAY_SIZE(short_center_tap_errata)); + if (ret) + return ret; + + return lanphy_write_reg_data(phydev, waketx_timer_errata, + ARRAY_SIZE(waketx_timer_errata)); } static int lan8842_config_init(struct phy_device *phydev) From 38f50242bf0f237cdc262308d624d333286ec3c5 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Tue, 28 Oct 2025 17:12:26 +0100 Subject: [PATCH 0671/1075] sctp: Hold RCU read lock while iterating over address list With CONFIG_PROVE_RCU_LIST=y and by executing $ netcat -l --sctp & $ netcat --sctp localhost & $ ss --sctp one can trigger the following Lockdep-RCU splat(s): WARNING: suspicious RCU usage 6.18.0-rc1-00093-g7f864458e9a6 #5 Not tainted ----------------------------- net/sctp/diag.c:76 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by ss/215: #0: ffff9c740828bec0 (nlk_cb_mutex-SOCK_DIAG){+.+.}-{4:4}, at: __netlink_dump_start+0x84/0x2b0 #1: ffff9c7401d72cd0 (sk_lock-AF_INET6){+.+.}-{0:0}, at: sctp_sock_dump+0x38/0x200 stack backtrace: CPU: 0 UID: 0 PID: 215 Comm: ss Not tainted 6.18.0-rc1-00093-g7f864458e9a6 #5 PREEMPT(voluntary) Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x5d/0x90 lockdep_rcu_suspicious.cold+0x4e/0xa3 inet_sctp_diag_fill.isra.0+0x4b1/0x5d0 sctp_sock_dump+0x131/0x200 sctp_transport_traverse_process+0x170/0x1b0 ? __pfx_sctp_sock_filter+0x10/0x10 ? __pfx_sctp_sock_dump+0x10/0x10 sctp_diag_dump+0x103/0x140 __inet_diag_dump+0x70/0xb0 netlink_dump+0x148/0x490 __netlink_dump_start+0x1f3/0x2b0 inet_diag_handler_cmd+0xcd/0x100 ? __pfx_inet_diag_dump_start+0x10/0x10 ? __pfx_inet_diag_dump+0x10/0x10 ? __pfx_inet_diag_dump_done+0x10/0x10 sock_diag_rcv_msg+0x18e/0x320 ? __pfx_sock_diag_rcv_msg+0x10/0x10 netlink_rcv_skb+0x4d/0x100 netlink_unicast+0x1d7/0x2b0 netlink_sendmsg+0x203/0x450 ____sys_sendmsg+0x30c/0x340 ___sys_sendmsg+0x94/0xf0 __sys_sendmsg+0x83/0xf0 do_syscall_64+0xbb/0x390 entry_SYSCALL_64_after_hwframe+0x77/0x7f ... Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Stefan Wiehler Reviewed-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251028161506.3294376-2-stefan.wiehler@nokia.com Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 996c2018f0e6..1a8761f87bf1 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -73,19 +73,23 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, struct nlattr *attr; void *info = NULL; + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) addrcnt++; + rcu_read_unlock(); attr = nla_reserve(skb, INET_DIAG_LOCALS, addrlen * addrcnt); if (!attr) return -EMSGSIZE; info = nla_data(attr); + rcu_read_lock(); list_for_each_entry_rcu(laddr, address_list, list) { memcpy(info, &laddr->a, sizeof(laddr->a)); memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; } + rcu_read_unlock(); return 0; } From 95aef86ab231f047bb8085c70666059b58f53c09 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Tue, 28 Oct 2025 17:12:27 +0100 Subject: [PATCH 0672/1075] sctp: Prevent TOCTOU out-of-bounds write For the following path not holding the sock lock, sctp_diag_dump() -> sctp_for_each_endpoint() -> sctp_ep_dump() make sure not to exceed bounds in case the address list has grown between buffer allocation (time-of-check) and write (time-of-use). Suggested-by: Kuniyuki Iwashima Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Stefan Wiehler Reviewed-by: Kuniyuki Iwashima Acked-by: Xin Long Link: https://patch.msgid.link/20251028161506.3294376-3-stefan.wiehler@nokia.com Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 1a8761f87bf1..5d64dd99ca9a 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -88,6 +88,9 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, memcpy(info, &laddr->a, sizeof(laddr->a)); memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; + + if (!--addrcnt) + break; } rcu_read_unlock(); From f1fc201148c7e684c10a72b6a3375597f28d1ef6 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Tue, 28 Oct 2025 17:12:28 +0100 Subject: [PATCH 0673/1075] sctp: Hold sock lock while iterating over address list Move address list traversal in inet_assoc_attr_size() under the sock lock to avoid holding the RCU read lock. Suggested-by: Xin Long Fixes: 8f840e47f190 ("sctp: add the sctp_diag.c file") Signed-off-by: Stefan Wiehler Acked-by: Xin Long Link: https://patch.msgid.link/20251028161506.3294376-4-stefan.wiehler@nokia.com Signed-off-by: Jakub Kicinski --- net/sctp/diag.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 5d64dd99ca9a..2afb376299fe 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -230,14 +230,15 @@ struct sctp_comm_param { bool net_admin; }; -static size_t inet_assoc_attr_size(struct sctp_association *asoc) +static size_t inet_assoc_attr_size(struct sock *sk, + struct sctp_association *asoc) { int addrlen = sizeof(struct sockaddr_storage); int addrcnt = 0; struct sctp_sockaddr_entry *laddr; list_for_each_entry_rcu(laddr, &asoc->base.bind_addr.address_list, - list) + list, lockdep_sock_is_held(sk)) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) @@ -263,11 +264,14 @@ static int sctp_sock_dump_one(struct sctp_endpoint *ep, struct sctp_transport *t if (err) return err; - rep = nlmsg_new(inet_assoc_attr_size(assoc), GFP_KERNEL); - if (!rep) - return -ENOMEM; - lock_sock(sk); + + rep = nlmsg_new(inet_assoc_attr_size(sk, assoc), GFP_KERNEL); + if (!rep) { + release_sock(sk); + return -ENOMEM; + } + if (ep != assoc->ep) { err = -EAGAIN; goto out; From d261f5b09c28850dc63ca1d3018596f829f402d5 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Fri, 31 Oct 2025 17:52:02 +0200 Subject: [PATCH 0674/1075] net: ionic: add dma_wmb() before ringing TX doorbell The TX path currently writes descriptors and then immediately writes to the MMIO doorbell register to notify the NIC. On weakly ordered architectures, descriptor writes may still be pending in CPU or DMA write buffers when the doorbell is issued, leading to the device fetching stale or incomplete descriptors. Add a dma_wmb() in ionic_txq_post() to ensure all descriptor writes are visible to the device before the doorbell MMIO write. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Mohammad Heib Link: https://patch.msgid.link/20251031155203.203031-1-mheib@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_txrx.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index d10b58ebf603..2e571d0a0d8a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -29,6 +29,10 @@ static void ionic_tx_clean(struct ionic_queue *q, static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell) { + /* Ensure TX descriptor writes reach memory before NIC reads them. + * Prevents device from fetching stale descriptors. + */ + dma_wmb(); ionic_q_post(q, ring_dbell); } From de0337d641bfa5b6d6b489e479792f1039274e84 Mon Sep 17 00:00:00 2001 From: Mohammad Heib Date: Fri, 31 Oct 2025 17:52:03 +0200 Subject: [PATCH 0675/1075] net: ionic: map SKB after pseudo-header checksum prep The TSO path called ionic_tx_map_skb() before preparing the TCP pseudo checksum (ionic_tx_tcp_[inner_]pseudo_csum()), which may perform skb_cow_head() and might modifies bytes in the linear header area. Mapping first and then mutating the header risks: - Using a stale DMA address if skb_cow_head() relocates the head, and/or - Device reading stale header bytes on weakly-ordered systems (CPU writes after mapping are not guaranteed visible without an explicit dma_sync_single_for_device()). Reorder the TX path to perform all header mutations (including skb_cow_head()) *before* DMA mapping. Mapping is now done only after the skb layout and header contents are final. This removes the need for any post-mapping dma_sync and prevents on-wire corruption observed under VLAN+TSO load after repeated runs. This change is purely an ordering fix; no functional behavior change otherwise. Fixes: 0f3154e6bcb3 ("ionic: Add Tx and Rx handling") Signed-off-by: Mohammad Heib Reviewed-by: Brett Creeley Link: https://patch.msgid.link/20251031155203.203031-2-mheib@redhat.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/pensando/ionic/ionic_txrx.c | 30 ++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 2e571d0a0d8a..301ebee2fdc5 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -1448,19 +1448,6 @@ static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, bool encap; int err; - desc_info = &q->tx_info[q->head_idx]; - - if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) - return -EIO; - - len = skb->len; - mss = skb_shinfo(skb)->gso_size; - outer_csum = (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | - SKB_GSO_GRE_CSUM | - SKB_GSO_IPXIP4 | - SKB_GSO_IPXIP6 | - SKB_GSO_UDP_TUNNEL | - SKB_GSO_UDP_TUNNEL_CSUM)); has_vlan = !!skb_vlan_tag_present(skb); vlan_tci = skb_vlan_tag_get(skb); encap = skb->encapsulation; @@ -1474,12 +1461,21 @@ static int ionic_tx_tso(struct net_device *netdev, struct ionic_queue *q, err = ionic_tx_tcp_inner_pseudo_csum(skb); else err = ionic_tx_tcp_pseudo_csum(skb); - if (unlikely(err)) { - /* clean up mapping from ionic_tx_map_skb */ - ionic_tx_desc_unmap_bufs(q, desc_info); + if (unlikely(err)) return err; - } + desc_info = &q->tx_info[q->head_idx]; + if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) + return -EIO; + + len = skb->len; + mss = skb_shinfo(skb)->gso_size; + outer_csum = (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | + SKB_GSO_IPXIP4 | + SKB_GSO_IPXIP6 | + SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM)); if (encap) hdrlen = skb_inner_tcp_all_headers(skb); else From 56b3c85e153b84f27e6cff39623ba40a1ad299d3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 27 Oct 2025 10:50:21 -0700 Subject: [PATCH 0676/1075] ftrace: Fix BPF fexit with livepatch When livepatch is attached to the same function as bpf trampoline with a fexit program, bpf trampoline code calls register_ftrace_direct() twice. The first time will fail with -EAGAIN, and the second time it will succeed. This requires register_ftrace_direct() to unregister the address on the first attempt. Otherwise, the bpf trampoline cannot attach. Here is an easy way to reproduce this issue: insmod samples/livepatch/livepatch-sample.ko bpftrace -e 'fexit:cmdline_proc_show {}' ERROR: Unable to attach probe: fexit:vmlinux:cmdline_proc_show... Fix this by cleaning up the hash when register_ftrace_function_nolock hits errors. Also, move the code that resets ops->func and ops->trampoline to the error path of register_ftrace_direct(); and add a helper function reset_direct() in register_ftrace_direct() and unregister_ftrace_direct(). Fixes: d05cb470663a ("ftrace: Fix modification of direct_function hash while in use") Cc: stable@vger.kernel.org # v6.6+ Reported-by: Andrey Grodzovsky Closes: https://lore.kernel.org/live-patching/c5058315a39d4615b333e485893345be@crowdstrike.com/ Cc: Steven Rostedt (Google) Cc: Masami Hiramatsu (Google) Acked-and-tested-by: Andrey Grodzovsky Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Link: https://lore.kernel.org/r/20251027175023.1521602-2-song@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- kernel/bpf/trampoline.c | 5 ----- kernel/trace/ftrace.c | 20 ++++++++++++++------ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 5949095e51c3..f2cb0b097093 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -479,11 +479,6 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr, bool lock_direct_mut * BPF_TRAMP_F_SHARE_IPMODIFY is set, we can generate the * trampoline again, and retry register. */ - /* reset fops->func and fops->trampoline for re-register */ - tr->fops->func = NULL; - tr->fops->trampoline = 0; - - /* free im memory and reallocate later */ bpf_tramp_image_free(im); goto again; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 42bd2ba68a82..cbeb7e833131 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5953,6 +5953,17 @@ static void register_ftrace_direct_cb(struct rcu_head *rhp) free_ftrace_hash(fhp); } +static void reset_direct(struct ftrace_ops *ops, unsigned long addr) +{ + struct ftrace_hash *hash = ops->func_hash->filter_hash; + + remove_direct_functions_hash(hash, addr); + + /* cleanup for possible another register call */ + ops->func = NULL; + ops->trampoline = 0; +} + /** * register_ftrace_direct - Call a custom trampoline directly * for multiple functions registered in @ops @@ -6048,6 +6059,8 @@ int register_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) ops->direct_call = addr; err = register_ftrace_function_nolock(ops); + if (err) + reset_direct(ops, addr); out_unlock: mutex_unlock(&direct_mutex); @@ -6080,7 +6093,6 @@ EXPORT_SYMBOL_GPL(register_ftrace_direct); int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, bool free_filters) { - struct ftrace_hash *hash = ops->func_hash->filter_hash; int err; if (check_direct_multi(ops)) @@ -6090,13 +6102,9 @@ int unregister_ftrace_direct(struct ftrace_ops *ops, unsigned long addr, mutex_lock(&direct_mutex); err = unregister_ftrace_function(ops); - remove_direct_functions_hash(hash, addr); + reset_direct(ops, addr); mutex_unlock(&direct_mutex); - /* cleanup for possible another register call */ - ops->func = NULL; - ops->trampoline = 0; - if (free_filters) ftrace_free_filter(ops); return err; From 3e9a18e1c3e931abecf501cbb23d28d69f85bb56 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 27 Oct 2025 10:50:22 -0700 Subject: [PATCH 0677/1075] ftrace: bpf: Fix IPMODIFY + DIRECT in modify_ftrace_direct() ftrace_hash_ipmodify_enable() checks IPMODIFY and DIRECT ftrace_ops on the same kernel function. When needed, ftrace_hash_ipmodify_enable() calls ops->ops_func() to prepare the direct ftrace (BPF trampoline) to share the same function as the IPMODIFY ftrace (livepatch). ftrace_hash_ipmodify_enable() is called in register_ftrace_direct() path, but not called in modify_ftrace_direct() path. As a result, the following operations will break livepatch: 1. Load livepatch to a kernel function; 2. Attach fentry program to the kernel function; 3. Attach fexit program to the kernel function. After 3, the kernel function being used will not be the livepatched version, but the original version. Fix this by adding __ftrace_hash_update_ipmodify() to __modify_ftrace_direct() and adjust some logic around the call. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Link: https://lore.kernel.org/r/20251027175023.1521602-3-song@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cbeb7e833131..59cfacb8a5bb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1971,7 +1971,8 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops) */ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, struct ftrace_hash *old_hash, - struct ftrace_hash *new_hash) + struct ftrace_hash *new_hash, + bool update_target) { struct ftrace_page *pg; struct dyn_ftrace *rec, *end = NULL; @@ -2006,10 +2007,13 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, if (rec->flags & FTRACE_FL_DISABLED) continue; - /* We need to update only differences of filter_hash */ + /* + * Unless we are updating the target of a direct function, + * we only need to update differences of filter_hash + */ in_old = !!ftrace_lookup_ip(old_hash, rec->ip); in_new = !!ftrace_lookup_ip(new_hash, rec->ip); - if (in_old == in_new) + if (!update_target && (in_old == in_new)) continue; if (in_new) { @@ -2020,7 +2024,16 @@ static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops, if (is_ipmodify) goto rollback; - FTRACE_WARN_ON(rec->flags & FTRACE_FL_DIRECT); + /* + * If this is called by __modify_ftrace_direct() + * then it is only changing where the direct + * pointer is jumping to, and the record already + * points to a direct trampoline. If it isn't, + * then it is a bug to update ipmodify on a direct + * caller. + */ + FTRACE_WARN_ON(!update_target && + (rec->flags & FTRACE_FL_DIRECT)); /* * Another ops with IPMODIFY is already @@ -2076,7 +2089,7 @@ static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops) if (ftrace_hash_empty(hash)) hash = NULL; - return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash); + return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash, false); } /* Disabling always succeeds */ @@ -2087,7 +2100,7 @@ static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops) if (ftrace_hash_empty(hash)) hash = NULL; - __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH); + __ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH, false); } static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, @@ -2101,7 +2114,7 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops, if (ftrace_hash_empty(new_hash)) new_hash = NULL; - return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash); + return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash, false); } static void print_ip_ins(const char *fmt, const unsigned char *p) @@ -6114,7 +6127,7 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_direct); static int __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) { - struct ftrace_hash *hash; + struct ftrace_hash *hash = ops->func_hash->filter_hash; struct ftrace_func_entry *entry, *iter; static struct ftrace_ops tmp_ops = { .func = ftrace_stub, @@ -6134,13 +6147,21 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) if (err) return err; + /* + * Call __ftrace_hash_update_ipmodify() here, so that we can call + * ops->ops_func for the ops. This is needed because the above + * register_ftrace_function_nolock() worked on tmp_ops. + */ + err = __ftrace_hash_update_ipmodify(ops, hash, hash, true); + if (err) + goto out; + /* * Now the ftrace_ops_list_func() is called to do the direct callers. * We can safely change the direct functions attached to each entry. */ mutex_lock(&ftrace_lock); - hash = ops->func_hash->filter_hash; size = 1 << hash->size_bits; for (i = 0; i < size; i++) { hlist_for_each_entry(iter, &hash->buckets[i], hlist) { @@ -6155,6 +6176,7 @@ __modify_ftrace_direct(struct ftrace_ops *ops, unsigned long addr) mutex_unlock(&ftrace_lock); +out: /* Removing the tmp_ops will add the updated direct callers to the functions */ unregister_ftrace_function(&tmp_ops); From 62d2d0a33839c28173909616db2ef16e1a4a5071 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 27 Oct 2025 10:50:23 -0700 Subject: [PATCH 0678/1075] selftests/bpf: Add tests for livepatch + bpf trampoline Both livepatch and BPF trampoline use ftrace. Special attention is needed when livepatch and fexit program touch the same function at the same time, because livepatch updates a kernel function and the BPF trampoline need to call into the right version of the kernel function. Use samples/livepatch/livepatch-sample.ko for the test. The test covers two cases: 1) When a fentry program is loaded first. This exercises the modify_ftrace_direct code path. 2) When a fentry program is loaded first. This exercises the register_ftrace_direct code path. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Link: https://lore.kernel.org/r/20251027175023.1521602-4-song@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- tools/testing/selftests/bpf/config | 3 + .../bpf/prog_tests/livepatch_trampoline.c | 107 ++++++++++++++++++ .../bpf/progs/livepatch_trampoline.c | 30 +++++ 3 files changed, 140 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c create mode 100644 tools/testing/selftests/bpf/progs/livepatch_trampoline.c diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 70b28c1e653e..f2a2fd236ca8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -50,6 +50,7 @@ CONFIG_IPV6_SIT=y CONFIG_IPV6_TUNNEL=y CONFIG_KEYS=y CONFIG_LIRC=y +CONFIG_LIVEPATCH=y CONFIG_LWTUNNEL=y CONFIG_MODULE_SIG=y CONFIG_MODULE_SRCVERSION_ALL=y @@ -111,6 +112,8 @@ CONFIG_IP6_NF_FILTER=y CONFIG_NF_NAT=y CONFIG_PACKET=y CONFIG_RC_CORE=y +CONFIG_SAMPLES=y +CONFIG_SAMPLE_LIVEPATCH=m CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_SYN_COOKIES=y diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c new file mode 100644 index 000000000000..72aa5376c30e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include "testing_helpers.h" +#include "livepatch_trampoline.skel.h" + +static int load_livepatch(void) +{ + char path[4096]; + + /* CI will set KBUILD_OUTPUT */ + snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko", + getenv("KBUILD_OUTPUT") ? : "../../../.."); + + return load_module(path, env_verbosity > VERBOSE_NONE); +} + +static void unload_livepatch(void) +{ + /* Disable the livepatch before unloading the module */ + system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled"); + + unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE); +} + +static void read_proc_cmdline(void) +{ + char buf[4096]; + int fd, ret; + + fd = open("/proc/cmdline", O_RDONLY); + if (!ASSERT_OK_FD(fd, "open /proc/cmdline")) + return; + + ret = read(fd, buf, sizeof(buf)); + if (!ASSERT_GT(ret, 0, "read /proc/cmdline")) + goto out; + + ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp"); + +out: + close(fd); +} + +static void __test_livepatch_trampoline(bool fexit_first) +{ + struct livepatch_trampoline *skel = NULL; + int err; + + skel = livepatch_trampoline__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + skel->bss->my_pid = getpid(); + + if (!fexit_first) { + /* fentry program is loaded first by default */ + err = livepatch_trampoline__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + } else { + /* Manually load fexit program first. */ + skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline); + if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit")) + goto out; + + skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline); + if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry")) + goto out; + } + + read_proc_cmdline(); + + ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit"); + ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit"); +out: + livepatch_trampoline__destroy(skel); +} + +void test_livepatch_trampoline(void) +{ + int retry_cnt = 0; + +retry: + if (load_livepatch()) { + if (retry_cnt) { + ASSERT_OK(1, "load_livepatch"); + goto out; + } + /* + * Something else (previous run of the same test?) loaded + * the KLP module. Unload the KLP module and retry. + */ + unload_livepatch(); + retry_cnt++; + goto retry; + } + + if (test__start_subtest("fentry_first")) + __test_livepatch_trampoline(false); + + if (test__start_subtest("fexit_first")) + __test_livepatch_trampoline(true); +out: + unload_livepatch(); +} diff --git a/tools/testing/selftests/bpf/progs/livepatch_trampoline.c b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c new file mode 100644 index 000000000000..15579d5bcd91 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include + +int fentry_hit; +int fexit_hit; +int my_pid; + +SEC("fentry/cmdline_proc_show") +int BPF_PROG(fentry_cmdline) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + fentry_hit = 1; + return 0; +} + +SEC("fexit/cmdline_proc_show") +int BPF_PROG(fexit_cmdline) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + fexit_hit = 1; + return 0; +} From 2e25935ed24daee37c4c2e8e29e478ce6e1f72c7 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 1 Nov 2025 16:26:42 +0300 Subject: [PATCH 0679/1075] octeontx2-pf: Fix devm_kcalloc() error checking The devm_kcalloc() function never return error pointers, it returns NULL on failure. Also delete the netdev_err() printk. These allocation functions already have debug output built-in some the extra error message is not required. Fixes: efabce290151 ("octeontx2-pf: AF_XDP zero copy receive support") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aQYKkrGA12REb2sj@stanley.mountain Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index aff17c37ddde..902d6abaa3ec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1516,10 +1516,8 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, pool->xdp_cnt = numptrs; pool->xdp = devm_kcalloc(pfvf->dev, numptrs, sizeof(struct xdp_buff *), GFP_KERNEL); - if (IS_ERR(pool->xdp)) { - netdev_err(pfvf->netdev, "Creation of xsk pool failed\n"); - return PTR_ERR(pool->xdp); - } + if (!pool->xdp) + return -ENOMEM; } return 0; From 5556f23478e6eb5d6a0321d4135e2c37a3c78a1e Mon Sep 17 00:00:00 2001 From: Vivian Wang Date: Mon, 3 Nov 2025 10:02:49 +0800 Subject: [PATCH 0680/1075] net: spacemit: Check netif_running() in emac_set_pauseparam() Currently, emac_set_pauseparam() will oops if userspace calls it while the interface is not up, because phydev is NULL, but it is still accessed in emac_set_fc() and emac_set_fc_autoneg(). Check for netif_running(dev) in emac_set_pauseparam() before proceeding. Fixes: bfec6d7f2001 ("net: spacemit: Add K1 Ethernet MAC") Signed-off-by: Vivian Wang Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251103-k1-ethernet-remove-fc-v3-1-2083770cd282@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/spacemit/k1_emac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/spacemit/k1_emac.c b/drivers/net/ethernet/spacemit/k1_emac.c index e1c5faff3b71..220eb5ce7583 100644 --- a/drivers/net/ethernet/spacemit/k1_emac.c +++ b/drivers/net/ethernet/spacemit/k1_emac.c @@ -1441,6 +1441,9 @@ static int emac_set_pauseparam(struct net_device *dev, struct emac_priv *priv = netdev_priv(dev); u8 fc = 0; + if (!netif_running(dev)) + return -ENETDOWN; + priv->flow_control_autoneg = pause->autoneg; if (pause->autoneg) { From 59b20b15c112867f28a12a24aa25f14549db02e4 Mon Sep 17 00:00:00 2001 From: Huiwen He Date: Mon, 3 Nov 2025 10:36:19 +0800 Subject: [PATCH 0681/1075] sctp: make sctp_transport_init() void sctp_transport_init() is static and never returns NULL. It is only called by sctp_transport_new(), so change it to void and remove the redundant return value check. Signed-off-by: Huiwen He Acked-by: Xin Long Link: https://patch.msgid.link/20251103023619.1025622-1-hehuiwen@kylinos.cn Signed-off-by: Jakub Kicinski --- net/sctp/transport.c | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 4d258a6e8033..0d48c61fe6ad 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -37,10 +37,10 @@ /* 1st Level Abstractions. */ /* Initialize a new transport from provided memory. */ -static struct sctp_transport *sctp_transport_init(struct net *net, - struct sctp_transport *peer, - const union sctp_addr *addr, - gfp_t gfp) +static void sctp_transport_init(struct net *net, + struct sctp_transport *peer, + const union sctp_addr *addr, + gfp_t gfp) { /* Copy in the address. */ peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); @@ -83,8 +83,6 @@ static struct sctp_transport *sctp_transport_init(struct net *net, get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); refcount_set(&peer->refcnt, 1); - - return peer; } /* Allocate and initialize a new transport. */ @@ -96,20 +94,13 @@ struct sctp_transport *sctp_transport_new(struct net *net, transport = kzalloc(sizeof(*transport), gfp); if (!transport) - goto fail; + return NULL; - if (!sctp_transport_init(net, transport, addr, gfp)) - goto fail_init; + sctp_transport_init(net, transport, addr, gfp); SCTP_DBG_OBJCNT_INC(transport); return transport; - -fail_init: - kfree(transport); - -fail: - return NULL; } /* This transport is no longer needed. Free up if possible, or From e120f46768d98151ece8756ebd688b0e43dc8b29 Mon Sep 17 00:00:00 2001 From: Qendrim Maxhuni Date: Wed, 29 Oct 2025 08:57:44 +0100 Subject: [PATCH 0682/1075] net: usb: qmi_wwan: initialize MAC header offset in qmimux_rx_fixup Raw IP packets have no MAC header, leaving skb->mac_header uninitialized. This can trigger kernel panics on ARM64 when xfrm or other subsystems access the offset due to strict alignment checks. Initialize the MAC header to prevent such crashes. This can trigger kernel panics on ARM when running IPsec over the qmimux0 interface. Example trace: Internal error: Oops: 000000009600004f [#1] SMP CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.12.34-gbe78e49cb433 #1 Hardware name: LS1028A RDB Board (DT) pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : xfrm_input+0xde8/0x1318 lr : xfrm_input+0x61c/0x1318 sp : ffff800080003b20 Call trace: xfrm_input+0xde8/0x1318 xfrm6_rcv+0x38/0x44 xfrm6_esp_rcv+0x48/0xa8 ip6_protocol_deliver_rcu+0x94/0x4b0 ip6_input_finish+0x44/0x70 ip6_input+0x44/0xc0 ipv6_rcv+0x6c/0x114 __netif_receive_skb_one_core+0x5c/0x8c __netif_receive_skb+0x18/0x60 process_backlog+0x78/0x17c __napi_poll+0x38/0x180 net_rx_action+0x168/0x2f0 Fixes: c6adf77953bc ("net: usb: qmi_wwan: add qmap mux protocol support") Signed-off-by: Qendrim Maxhuni Link: https://patch.msgid.link/20251029075744.105113-1-qendrim.maxhuni@garderos.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/qmi_wwan.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 11352d85475a..3a4985b582cb 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -192,6 +192,12 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (!skbn) return 0; + /* Raw IP packets don't have a MAC header, but other subsystems + * (like xfrm) may still access MAC header offsets, so they must + * be initialized. + */ + skb_reset_mac_header(skbn); + switch (skb->data[offset + qmimux_hdr_sz] & 0xf0) { case 0x40: skbn->protocol = htons(ETH_P_IP); From b98b69c38512c3a8277c83b2d07674fd1ff59625 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 2 Nov 2025 21:10:15 +0200 Subject: [PATCH 0683/1075] ALSA: usb-audio: add min_mute quirk for SteelSeries Arctis ID 1038:1294 SteelSeries ApS Arctis Pro Wireless is reported to have muted min playback volume. Apply quirk for that. Link: https://gitlab.freedesktop.org/pipewire/pipewire/-/issues/4229#note_3174448 Signed-off-by: Pauli Virtanen Link: https://patch.msgid.link/a83f2694b1f8c37e4667a3cf057ffdc408b0f70d.1762108507.git.pav@iki.fi Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 71638e6dfb20..e5b857129caf 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2267,6 +2267,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0fd9, 0x0008, /* Hauppauge HVR-950Q */ QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), + DEVICE_FLG(0x1038, 0x1294, /* SteelSeries Arctis Pro Wireless */ + QUIRK_FLAG_MIXER_PLAYBACK_MIN_MUTE), DEVICE_FLG(0x1101, 0x0003, /* Audioengine D1 */ QUIRK_FLAG_GET_SAMPLE_RATE), DEVICE_FLG(0x12d1, 0x3a07, /* Huawei Technologies Co., Ltd. */ From fd9f30d1038ee1624baa17a6ff11effe5f7617cb Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 3 Nov 2025 22:38:26 +0100 Subject: [PATCH 0684/1075] parisc: Avoid crash due to unaligned access in unwinder Guenter Roeck reported this kernel crash on his emulated B160L machine: Starting network: udhcpc: started, v1.36.1 Backtrace: [<104320d4>] unwind_once+0x1c/0x5c [<10434a00>] walk_stackframe.isra.0+0x74/0xb8 [<10434a6c>] arch_stack_walk+0x28/0x38 [<104e5efc>] stack_trace_save+0x48/0x5c [<105d1bdc>] set_track_prepare+0x44/0x6c [<105d9c80>] ___slab_alloc+0xfc4/0x1024 [<105d9d38>] __slab_alloc.isra.0+0x58/0x90 [<105dc80c>] kmem_cache_alloc_noprof+0x2ac/0x4a0 [<105b8e54>] __anon_vma_prepare+0x60/0x280 [<105a823c>] __vmf_anon_prepare+0x68/0x94 [<105a8b34>] do_wp_page+0x8cc/0xf10 [<105aad88>] handle_mm_fault+0x6c0/0xf08 [<10425568>] do_page_fault+0x110/0x440 [<10427938>] handle_interruption+0x184/0x748 [<11178398>] schedule+0x4c/0x190 BUG: spinlock recursion on CPU#0, ifconfig/2420 lock: terminate_lock.2+0x0/0x1c, .magic: dead4ead, .owner: ifconfig/2420, .owner_cpu: 0 While creating the stack trace, the unwinder uses the stack pointer to guess the previous frame to read the previous stack pointer from memory. The crash happens, because the unwinder tries to read from unaligned memory and as such triggers the unalignment trap handler which then leads to the spinlock recursion and finally to a deadlock. Fix it by checking the alignment before accessing the memory. Reported-by: Guenter Roeck Signed-off-by: Helge Deller Tested-by: Guenter Roeck Cc: stable@vger.kernel.org # v6.12+ --- arch/parisc/kernel/unwind.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index f7e0fee5ee55..7ac88ff13d3c 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -35,6 +35,8 @@ #define KERNEL_START (KERNEL_BINARY_TEXT_START) +#define ALIGNMENT_OK(ptr, type) (((ptr) & (sizeof(type) - 1)) == 0) + extern struct unwind_table_entry __start___unwind[]; extern struct unwind_table_entry __stop___unwind[]; @@ -257,12 +259,15 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int if (pc_is_kernel_fn(pc, _switch_to) || pc == (unsigned long)&_switch_to_ret) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; - info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); + if (ALIGNMENT_OK(info->prev_sp, long)) + info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); + else + info->prev_ip = info->prev_sp = 0; return 1; } #ifdef CONFIG_IRQSTACKS - if (pc == (unsigned long)&_call_on_stack) { + if (pc == (unsigned long)&_call_on_stack && ALIGNMENT_OK(info->sp, long)) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; @@ -370,8 +375,10 @@ static void unwind_frame_regs(struct unwind_frame_info *info) info->prev_sp = info->sp - frame_size; if (e->Millicode) info->rp = info->r31; - else if (rpoffset) + else if (rpoffset && ALIGNMENT_OK(info->prev_sp, long)) info->rp = *(unsigned long *)(info->prev_sp - rpoffset); + else + info->rp = 0; info->prev_ip = info->rp; info->rp = 0; } From 249d96b492efb7a773296ab2c62179918301c146 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Tue, 4 Nov 2025 13:49:14 +0200 Subject: [PATCH 0685/1075] ASoC: da7213: Use component driver suspend/resume Since snd_soc_suspend() is invoked through snd_soc_pm_ops->suspend(), and snd_soc_pm_ops is associated with the soc_driver (defined in sound/soc/soc-core.c), and there is no parent-child relationship between the soc_driver and the DA7213 codec driver, the power management subsystem does not enforce a specific suspend/resume order between the DA7213 driver and the soc_driver. Because of this, the different codec component functionalities, called from snd_soc_resume() to reconfigure various functions, can race with the DA7213 struct dev_pm_ops::resume function, leading to misapplied configuration. This occasionally results in clipped sound. Fix this by dropping the struct dev_pm_ops::{suspend, resume} and use instead struct snd_soc_component_driver::{suspend, resume}. This ensures the proper configuration sequence is handled by the ASoC subsystem. Cc: stable@vger.kernel.org Fixes: 431e040065c8 ("ASoC: da7213: Add suspend to RAM support") Signed-off-by: Claudiu Beznea Link: https://patch.msgid.link/20251104114914.2060603-1-claudiu.beznea.uj@bp.renesas.com Signed-off-by: Mark Brown --- sound/soc/codecs/da7213.c | 69 +++++++++++++++++++++++++-------------- sound/soc/codecs/da7213.h | 1 + 2 files changed, 45 insertions(+), 25 deletions(-) diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index ae89260ca215..3420011da444 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -2124,11 +2124,50 @@ static int da7213_probe(struct snd_soc_component *component) return 0; } +static int da7213_runtime_suspend(struct device *dev) +{ + struct da7213_priv *da7213 = dev_get_drvdata(dev); + + regcache_cache_only(da7213->regmap, true); + regcache_mark_dirty(da7213->regmap); + regulator_bulk_disable(DA7213_NUM_SUPPLIES, da7213->supplies); + + return 0; +} + +static int da7213_runtime_resume(struct device *dev) +{ + struct da7213_priv *da7213 = dev_get_drvdata(dev); + int ret; + + ret = regulator_bulk_enable(DA7213_NUM_SUPPLIES, da7213->supplies); + if (ret < 0) + return ret; + regcache_cache_only(da7213->regmap, false); + return regcache_sync(da7213->regmap); +} + +static int da7213_suspend(struct snd_soc_component *component) +{ + struct da7213_priv *da7213 = snd_soc_component_get_drvdata(component); + + return da7213_runtime_suspend(da7213->dev); +} + +static int da7213_resume(struct snd_soc_component *component) +{ + struct da7213_priv *da7213 = snd_soc_component_get_drvdata(component); + + return da7213_runtime_resume(da7213->dev); +} + static const struct snd_soc_component_driver soc_component_dev_da7213 = { .probe = da7213_probe, .set_bias_level = da7213_set_bias_level, .controls = da7213_snd_controls, .num_controls = ARRAY_SIZE(da7213_snd_controls), + .suspend = da7213_suspend, + .resume = da7213_resume, .dapm_widgets = da7213_dapm_widgets, .num_dapm_widgets = ARRAY_SIZE(da7213_dapm_widgets), .dapm_routes = da7213_audio_map, @@ -2175,6 +2214,8 @@ static int da7213_i2c_probe(struct i2c_client *i2c) if (!da7213->fin_min_rate) return -EINVAL; + da7213->dev = &i2c->dev; + i2c_set_clientdata(i2c, da7213); /* Get required supplies */ @@ -2224,31 +2265,9 @@ static void da7213_i2c_remove(struct i2c_client *i2c) pm_runtime_disable(&i2c->dev); } -static int da7213_runtime_suspend(struct device *dev) -{ - struct da7213_priv *da7213 = dev_get_drvdata(dev); - - regcache_cache_only(da7213->regmap, true); - regcache_mark_dirty(da7213->regmap); - regulator_bulk_disable(DA7213_NUM_SUPPLIES, da7213->supplies); - - return 0; -} - -static int da7213_runtime_resume(struct device *dev) -{ - struct da7213_priv *da7213 = dev_get_drvdata(dev); - int ret; - - ret = regulator_bulk_enable(DA7213_NUM_SUPPLIES, da7213->supplies); - if (ret < 0) - return ret; - regcache_cache_only(da7213->regmap, false); - return regcache_sync(da7213->regmap); -} - -static DEFINE_RUNTIME_DEV_PM_OPS(da7213_pm, da7213_runtime_suspend, - da7213_runtime_resume, NULL); +static const struct dev_pm_ops da7213_pm = { + RUNTIME_PM_OPS(da7213_runtime_suspend, da7213_runtime_resume, NULL) +}; static const struct i2c_device_id da7213_i2c_id[] = { { "da7213" }, diff --git a/sound/soc/codecs/da7213.h b/sound/soc/codecs/da7213.h index b9ab791d6b88..29cbf0eb6124 100644 --- a/sound/soc/codecs/da7213.h +++ b/sound/soc/codecs/da7213.h @@ -595,6 +595,7 @@ enum da7213_supplies { /* Codec private data */ struct da7213_priv { struct regmap *regmap; + struct device *dev; struct mutex ctrl_lock; struct regulator_bulk_data supplies[DA7213_NUM_SUPPLIES]; struct clk *mclk; From d24822e1471d68a6eceee67809b5956d59482cb5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 27 Oct 2025 15:24:17 +0900 Subject: [PATCH 0686/1075] ksmbd: detect RDMA capable lower devices when bridge and vlan netdev is used If user set bridge interface as actual RDMA-capable NICs are lower devices, ksmbd can not detect as RDMA capable. This patch can detect the RDMA capable lower devices from bridge master or VLAN. With this change, ksmbd can accept both TCP and RDMA connections through the same bridge IP address, allowing mixed transport operation without requiring separate interfaces. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 7d86553fcc7c..4a8aeb1df0cc 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2606,7 +2606,7 @@ void ksmbd_rdma_destroy(void) } } -bool ksmbd_rdma_capable_netdev(struct net_device *netdev) +static bool ksmbd_find_rdma_capable_netdev(struct net_device *netdev) { struct smb_direct_device *smb_dev; int i; @@ -2648,6 +2648,24 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev) return rdma_capable; } +bool ksmbd_rdma_capable_netdev(struct net_device *netdev) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (ksmbd_find_rdma_capable_netdev(netdev)) + return true; + + /* check if netdev is bridge or VLAN */ + if (netif_is_bridge_master(netdev) || + netdev->priv_flags & IFF_802_1Q_VLAN) + netdev_for_each_lower_dev(netdev, lower_dev, iter) + if (ksmbd_find_rdma_capable_netdev(lower_dev)) + return true; + + return false; +} + static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = { .prepare = smb_direct_prepare, .disconnect = smb_direct_disconnect, From e6187655acfa2dd566ea2aed4522083f0bb940c3 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Mon, 27 Oct 2025 15:46:39 +0900 Subject: [PATCH 0687/1075] ksmbd: detect RDMA capable netdevs include IPoIB Current ksmbd_rdma_capable_netdev fails to mark certain RDMA-capable inerfaces such as IPoIB as RDMA capable after reverting GUID matching code due to layer violation. This patch check the ARPHRD_INFINIBAND type safely identifies an IPoIB interface without introducing a layer violation, ensuring RDMA functionality is correctly enabled for these interfaces. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 4a8aeb1df0cc..5d3b48e77012 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2663,6 +2663,10 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev) if (ksmbd_find_rdma_capable_netdev(lower_dev)) return true; + /* check if netdev is IPoIB safely without layer violation */ + if (netdev->type == ARPHRD_INFINIBAND) + return true; + return false; } From 734e99623c5b65bf2c03e35978a0b980ebc3c2f8 Mon Sep 17 00:00:00 2001 From: Henrique Carvalho Date: Mon, 3 Nov 2025 19:52:55 -0300 Subject: [PATCH 0688/1075] smb: client: fix potential UAF in smb2_close_cached_fid() find_or_create_cached_dir() could grab a new reference after kref_put() had seen the refcount drop to zero but before cfid_list_lock is acquired in smb2_close_cached_fid(), leading to use-after-free. Switch to kref_put_lock() so cfid_release() is called with cfid_list_lock held, closing that gap. Fixes: ebe98f1447bb ("cifs: enable caching of directories for which a lease is held") Cc: stable@vger.kernel.org Reported-by: Jay Shin Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Henrique Carvalho Signed-off-by: Steve French --- fs/smb/client/cached_dir.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index b8ac7b7faf61..018055fd2cdb 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -388,11 +388,11 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, * lease. Release one here, and the second below. */ cfid->has_lease = false; - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } spin_unlock(&cfids->cfid_list_lock); - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } else { *ret_cfid = cfid; atomic_inc(&tcon->num_remote_opens); @@ -438,12 +438,14 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, static void smb2_close_cached_fid(struct kref *ref) +__releases(&cfid->cfids->cfid_list_lock) { struct cached_fid *cfid = container_of(ref, struct cached_fid, refcount); int rc; - spin_lock(&cfid->cfids->cfid_list_lock); + lockdep_assert_held(&cfid->cfids->cfid_list_lock); + if (cfid->on_list) { list_del(&cfid->entry); cfid->on_list = false; @@ -478,7 +480,7 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, spin_lock(&cfid->cfids->cfid_list_lock); if (cfid->has_lease) { cfid->has_lease = false; - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } spin_unlock(&cfid->cfids->cfid_list_lock); close_cached_dir(cfid); @@ -487,7 +489,7 @@ void drop_cached_dir_by_name(const unsigned int xid, struct cifs_tcon *tcon, void close_cached_dir(struct cached_fid *cfid) { - kref_put(&cfid->refcount, smb2_close_cached_fid); + kref_put_lock(&cfid->refcount, smb2_close_cached_fid, &cfid->cfids->cfid_list_lock); } /* @@ -596,7 +598,7 @@ cached_dir_offload_close(struct work_struct *work) WARN_ON(cfid->on_list); - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close); } @@ -762,7 +764,7 @@ static void cfids_laundromat_worker(struct work_struct *work) * Drop the ref-count from above, either the lease-ref (if there * was one) or the extra one acquired. */ - kref_put(&cfid->refcount, smb2_close_cached_fid); + close_cached_dir(cfid); } queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, dir_cache_timeout * HZ); From 8a7348a9ed70bda1c1f51d3f1815bcbdf9f3b38c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 8 Oct 2025 09:52:25 -0400 Subject: [PATCH 0689/1075] nfsd: fix refcount leak in nfsd_set_fh_dentry() nfsd exports a "pseudo root filesystem" which is used by NFSv4 to find the various exported filesystems using LOOKUP requests from a known root filehandle. NFSv3 uses the MOUNT protocol to find those exported filesystems and so is not given access to the pseudo root filesystem. If a v3 (or v2) client uses a filehandle from that filesystem, nfsd_set_fh_dentry() will report an error, but still stores the export in "struct svc_fh" even though it also drops the reference (exp_put()). This means that when fh_put() is called an extra reference will be dropped which can lead to use-after-free and possible denial of service. Normal NFS usage will not provide a pseudo-root filehandle to a v3 client. This bug can only be triggered by the client synthesising an incorrect filehandle. To fix this we move the assignments to the svc_fh later, after all possible error cases have been detected. Reported-and-tested-by: tianshuo han Fixes: ef7f6c4904d0 ("nfsd: move V4ROOT version check to nfsd_set_fh_dentry()") Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: Chuck Lever --- fs/nfsd/nfsfh.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 3edccc38db42..bd9acfdc7b01 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -269,9 +269,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, dentry); } - fhp->fh_dentry = dentry; - fhp->fh_export = exp; - switch (fhp->fh_maxsize) { case NFS4_FHSIZE: if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR) @@ -293,6 +290,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, goto out; } + fhp->fh_dentry = dentry; + fhp->fh_export = exp; + return 0; out: exp_put(exp); From 4d3dbc2386fe051e44efad663e0ec828b98ab53f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 9 Oct 2025 16:37:59 -0400 Subject: [PATCH 0690/1075] nfsd: add missing FATTR4_WORD2_CLONE_BLKSIZE from supported attributes RFC 7862 Section 4.1.2 says that if the server supports CLONE it MUST support clone_blksize attribute. Fixes: d6ca7d2643ee ("NFSD: Implement FATTR4_CLONE_BLKSIZE attribute") Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfsd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index f19320018639..b752433c3c2c 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -458,6 +458,7 @@ enum { #define NFSD4_2_SUPPORTED_ATTRS_WORD2 \ (NFSD4_1_SUPPORTED_ATTRS_WORD2 | \ FATTR4_WORD2_MODE_UMASK | \ + FATTR4_WORD2_CLONE_BLKSIZE | \ NFSD4_2_SECURITY_ATTRS | \ FATTR4_WORD2_XATTR_SUPPORT | \ FATTR4_WORD2_TIME_DELEG_ACCESS | \ From fccac54b0d3d0602f177bb79f203ae6fbea0e32a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 27 Oct 2025 13:55:15 +0100 Subject: [PATCH 0691/1075] pmdomain: samsung: Rework legacy splash-screen handover workaround Limit the workaround for the lack of the proper splash-screen handover handling to the legacy ARM 32bit systems and replace forcing a sync_state by explicite power domain shutdown. This approach lets compiler to optimize it out on newer ARM 64bit systems. Suggested-by: Ulf Hansson Fixes: 0745658aebbe ("pmdomain: samsung: Fix splash-screen handover by enforcing a sync_state") Signed-off-by: Marek Szyprowski Acked-by: Krzysztof Kozlowski Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/pmdomain/samsung/exynos-pm-domains.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/pmdomain/samsung/exynos-pm-domains.c b/drivers/pmdomain/samsung/exynos-pm-domains.c index f53e1bd24798..5c3aa8983087 100644 --- a/drivers/pmdomain/samsung/exynos-pm-domains.c +++ b/drivers/pmdomain/samsung/exynos-pm-domains.c @@ -128,6 +128,15 @@ static int exynos_pd_probe(struct platform_device *pdev) pd->pd.power_on = exynos_pd_power_on; pd->local_pwr_cfg = pm_domain_cfg->local_pwr_cfg; + /* + * Some Samsung platforms with bootloaders turning on the splash-screen + * and handing it over to the kernel, requires the power-domains to be + * reset during boot. + */ + if (IS_ENABLED(CONFIG_ARM) && + of_device_is_compatible(np, "samsung,exynos4210-pd")) + exynos_pd_power_off(&pd->pd); + on = readl_relaxed(pd->base + 0x4) & pd->local_pwr_cfg; pm_genpd_init(&pd->pd, NULL, !on); @@ -146,15 +155,6 @@ static int exynos_pd_probe(struct platform_device *pdev) parent.np, child.np); } - /* - * Some Samsung platforms with bootloaders turning on the splash-screen - * and handing it over to the kernel, requires the power-domains to be - * reset during boot. As a temporary hack to manage this, let's enforce - * a sync_state. - */ - if (!ret) - of_genpd_sync_state(np); - pm_runtime_enable(dev); return ret; } From f1fdffe0afea02ba783acfe815b6a60e7180df40 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 4 Nov 2025 10:10:06 -0600 Subject: [PATCH 0692/1075] x86/CPU/AMD: Add missing terminator for zen5_rdseed_microcode Running x86_match_min_microcode_rev() on a Zen5 CPU trips up KASAN for an out of bounds access. Fixes: 607b9fb2ce248 ("x86/CPU/AMD: Add RDSEED fix for Zen5") Signed-off-by: Mario Limonciello Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251104161007.269885-1-mario.limonciello@amd.com --- arch/x86/kernel/cpu/amd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8e36964a7721..2ba9f2d42d8c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1038,6 +1038,7 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) static const struct x86_cpu_id zen5_rdseed_microcode[] = { ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), + {}, }; static void init_amd_zen5(struct cpuinfo_x86 *c) From bbde14682eba21d86f5f3d6fe2d371b1f97f1e61 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Tue, 28 Oct 2025 11:16:20 +0800 Subject: [PATCH 0693/1075] pmdomain: imx: Fix reference count leak in imx_gpc_remove of_get_child_by_name() returns a node pointer with refcount incremented, we should use of_node_put() on it when not needed anymore. Add the missing of_node_put() to avoid refcount leak. Fixes: 721cabf6c660 ("soc: imx: move PGC handling to a new GPC driver") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Signed-off-by: Ulf Hansson --- drivers/pmdomain/imx/gpc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pmdomain/imx/gpc.c b/drivers/pmdomain/imx/gpc.c index 33991f3c6b55..a34b260274f7 100644 --- a/drivers/pmdomain/imx/gpc.c +++ b/drivers/pmdomain/imx/gpc.c @@ -536,6 +536,8 @@ static void imx_gpc_remove(struct platform_device *pdev) return; } } + + of_node_put(pgc_node); } static struct platform_driver imx_gpc_driver = { From 8819a49f9ff8953475ba09d978d66b50368c095b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 30 Oct 2025 11:58:01 -0700 Subject: [PATCH 0694/1075] KVM: x86: Unload "FPU" state on INIT if and only if its currently in-use Replace the hack added by commit f958bd2314d1 ("KVM: x86: Fix potential put_fpu() w/o load_fpu() on MPX platform") with a more robust approach of unloading+reloading guest FPU state based on whether or not the vCPU's FPU is currently in-use, i.e. currently loaded. This fixes a bug on hosts that support CET but not MPX, where kvm_arch_vcpu_ioctl_get_mpstate() neglects to load FPU state (it only checks for MPX support) and leads to KVM attempting to put FPU state due to kvm_apic_accept_events() triggering INIT emulation. E.g. on a host with CET but not MPX, syzkaller+KASAN generates: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000004: 0000 [#1] SMP KASAN NOPTI KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] CPU: 211 UID: 0 PID: 20451 Comm: syz.9.26 Tainted: G S 6.18.0-smp-DEV #7 NONE Tainted: [S]=CPU_OUT_OF_SPEC Hardware name: Google Izumi/izumi, BIOS 0.20250729.1-0 07/29/2025 RIP: 0010:fpu_swap_kvm_fpstate+0x3ce/0x610 ../arch/x86/kernel/fpu/core.c:377 RSP: 0018:ff1100410c167cc0 EFLAGS: 00010202 RAX: 0000000000000004 RBX: 0000000000000020 RCX: 00000000000001aa RDX: 00000000000001ab RSI: ffffffff817bb960 RDI: 0000000022600000 RBP: dffffc0000000000 R08: ff110040d23c8007 R09: 1fe220081a479000 R10: dffffc0000000000 R11: ffe21c081a479001 R12: ff110040d23c8d98 R13: 00000000fffdc578 R14: 0000000000000000 R15: ff110040d23c8d90 FS: 00007f86dd1876c0(0000) GS:ff11007fc969b000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f86dd186fa8 CR3: 00000040d1dfa003 CR4: 0000000000f73ef0 PKRU: 80000000 Call Trace: kvm_vcpu_reset+0x80d/0x12c0 ../arch/x86/kvm/x86.c:11818 kvm_apic_accept_events+0x1cb/0x500 ../arch/x86/kvm/lapic.c:3489 kvm_arch_vcpu_ioctl_get_mpstate+0xd0/0x4e0 ../arch/x86/kvm/x86.c:12145 kvm_vcpu_ioctl+0x5e2/0xed0 ../virt/kvm/kvm_main.c:4539 __se_sys_ioctl+0x11d/0x1b0 ../fs/ioctl.c:51 do_syscall_x64 ../arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0x6e/0x940 ../arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7f86de71d9c9 with a very simple reproducer: r0 = openat$kvm(0xffffffffffffff9c, &(0x7f0000000000), 0x80b00, 0x0) r1 = ioctl$KVM_CREATE_VM(r0, 0xae01, 0x0) ioctl$KVM_CREATE_IRQCHIP(r1, 0xae60) r2 = ioctl$KVM_CREATE_VCPU(r1, 0xae41, 0x0) ioctl$KVM_SET_IRQCHIP(r1, 0x8208ae63, ...) ioctl$KVM_GET_MP_STATE(r2, 0x8004ae98, &(0x7f00000000c0)) Alternatively, the MPX hack in GET_MP_STATE could be extended to cover CET, but from a "don't break existing functionality" perspective, that isn't any less risky than peeking at the state of in_use, and it's far less robust for a long term solution (as evidenced by this bug). Reported-by: Alexander Potapenko Fixes: 69cc3e886582 ("KVM: x86: Add XSS support for CET_KERNEL and CET_USER") Reviewed-by: Yao Yuan Reviewed-by: Chao Gao Link: https://patch.msgid.link/20251030185802.3375059-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b4b5d2d09634..d1e048d14e88 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12137,9 +12137,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, int r; vcpu_load(vcpu); - if (kvm_mpx_supported()) - kvm_load_guest_fpu(vcpu); - kvm_vcpu_srcu_read_lock(vcpu); r = kvm_apic_accept_events(vcpu); @@ -12156,9 +12153,6 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, out: kvm_vcpu_srcu_read_unlock(vcpu); - - if (kvm_mpx_supported()) - kvm_put_guest_fpu(vcpu); vcpu_put(vcpu); return r; } @@ -12788,6 +12782,7 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) { struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; u64 xfeatures_mask; + bool fpu_in_use; int i; /* @@ -12811,13 +12806,23 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event) BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX); /* - * All paths that lead to INIT are required to load the guest's FPU - * state (because most paths are buried in KVM_RUN). + * Unload guest FPU state (if necessary) before zeroing XSTATE fields + * as the kernel can only modify the state when its resident in memory, + * i.e. when it's not loaded into hardware. + * + * WARN if the vCPU's desire to run, i.e. whether or not its in KVM_RUN, + * doesn't match the loaded/in-use state of the FPU, as KVM_RUN is the + * only path that can trigger INIT emulation _and_ loads FPU state, and + * KVM_RUN should _always_ load FPU state. */ - kvm_put_guest_fpu(vcpu); + WARN_ON_ONCE(vcpu->wants_to_run != fpstate->in_use); + fpu_in_use = fpstate->in_use; + if (fpu_in_use) + kvm_put_guest_fpu(vcpu); for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX) fpstate_clear_xstate_component(fpstate, i); - kvm_load_guest_fpu(vcpu); + if (fpu_in_use) + kvm_load_guest_fpu(vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) From 9bc610b6a2a71d1a6acac27e82a0bc8ca861c7ac Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 30 Oct 2025 11:58:02 -0700 Subject: [PATCH 0695/1075] KVM: x86: Harden KVM against imbalanced load/put of guest FPU state Assert, via KVM_BUG_ON(), that guest FPU state isn't/is in use when loading/putting the FPU to help detect KVM bugs without needing an assist from KASAN. If an imbalanced load/put is detected, skip the redundant load/put to avoid clobbering guest state and/or crashing the host. Note, kvm_access_xstate_msr() already provides a similar assertion. Reviewed-by: Yao Yuan Reviewed-by: Chao Gao Link: https://patch.msgid.link/20251030185802.3375059-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d1e048d14e88..67e5f735adf2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11807,6 +11807,9 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) /* Swap (qemu) user FPU context for the guest FPU context. */ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) + return; + /* Exclude PKRU, it's restored separately immediately after VM-Exit. */ fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true); trace_kvm_fpu(1); @@ -11815,6 +11818,9 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) /* When vcpu_run ends, restore user space FPU context. */ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) + return; + fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false); ++vcpu->stat.fpu_reload; trace_kvm_fpu(0); From cab4098be41826a91b55cdc851196d73d7057f9c Mon Sep 17 00:00:00 2001 From: Chao Gao Date: Mon, 27 Oct 2025 23:01:41 -0700 Subject: [PATCH 0696/1075] KVM: x86: Call out MSR_IA32_S_CET is not handled by XSAVES MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the comment above is_xstate_managed_msr() to note that MSR_IA32_S_CET isn't saved/restored by XSAVES/XRSTORS. MSR_IA32_S_CET isn't part of CET_U/S state as the SDM states: The register state used by Control-Flow Enforcement Technology (CET) comprises the two 64-bit MSRs (IA32_U_CET and IA32_PL3_SSP) that manage CET when CPL = 3 (CET_U state); and the three 64-bit MSRs (IA32_PL0_SSP–IA32_PL2_SSP) that manage CET when CPL < 3 (CET_S state). Opportunistically shift the snippet about the safety of loading certain MSRs to the function comment for kvm_access_xstate_msr(), which is where the MSRs are actually loaded into hardware. Fixes: e44eb58334bb ("KVM: x86: Load guest FPU state when access XSAVE-managed MSRs") Signed-off-by: Chao Gao Link: https://patch.msgid.link/20251028060142.29830-1-chao.gao@intel.com [sean: shift snippet about safety to kvm_access_xstate_msr()] Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 67e5f735adf2..c9c2aa6f4705 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3874,15 +3874,9 @@ static void record_steal_time(struct kvm_vcpu *vcpu) /* * Returns true if the MSR in question is managed via XSTATE, i.e. is context - * switched with the rest of guest FPU state. Note! S_CET is _not_ context - * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS. - * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields, - * the value saved/restored via XSTATE is always the host's value. That detail - * is _extremely_ important, as the guest's S_CET must _never_ be resident in - * hardware while executing in the host. Loading guest values for U_CET and - * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to - * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower - * privilege levels, i.e. are effectively only consumed by userspace as well. + * switched with the rest of guest FPU state. + * + * Note, S_CET is _not_ saved/restored via XSAVES/XRSTORS. */ static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) { @@ -3905,6 +3899,11 @@ static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) * MSR that is managed via XSTATE. Note, the caller is responsible for doing * the initial FPU load, this helper only ensures that guest state is resident * in hardware (the kernel can load its FPU state in IRQ context). + * + * Note, loading guest values for U_CET and PL[0-3]_SSP while executing in the + * kernel is safe, as U_CET is specific to userspace, and PL[0-3]_SSP are only + * consumed when transitioning to lower privilege levels, i.e. are effectively + * only consumed by userspace as well. */ static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info, From 59a217ced3e7af849cc84fce36d8bfe225976e27 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 16 Oct 2025 12:06:41 -0700 Subject: [PATCH 0697/1075] KVM: SVM: Initialize per-CPU svm_data at the end of hardware setup Setup the per-CPU SVM data structures at the very end of hardware setup so that svm_hardware_unsetup() can be used in svm_hardware_setup() to unwind AVIC setup (for the GALog notifier). Alternatively, the error path could do an explicit, manual unwind, e.g. by adding a helper to free the per-CPU structures. But the per-CPU allocations have no interactions or dependencies, i.e. can comfortably live at the end, and so converting to a manual unwind would introduce churn and code without providing any immediate advantage. Link: https://patch.msgid.link/20251016190643.80529-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/svm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 153c12dbf3eb..efc3a7adebef 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5386,12 +5386,6 @@ static __init int svm_hardware_setup(void) svm_hv_hardware_setup(); - for_each_possible_cpu(cpu) { - r = svm_cpu_init(cpu); - if (r) - goto err; - } - enable_apicv = avic_hardware_setup(); if (!enable_apicv) { enable_ipiv = false; @@ -5435,6 +5429,13 @@ static __init int svm_hardware_setup(void) svm_set_cpu_caps(); kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_CD_NW_CLEARED; + + for_each_possible_cpu(cpu) { + r = svm_cpu_init(cpu); + if (r) + goto err; + } + return 0; err: From adc6ae9729719be5e74219aaafb95e60a9e9950e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 16 Oct 2025 12:06:42 -0700 Subject: [PATCH 0698/1075] KVM: SVM: Unregister KVM's GALog notifier on kvm-amd.ko exit Unregister the GALog notifier (used to get notified of wake events for blocking vCPUs) on kvm-amd.ko exit so that a KVM or IOMMU driver bug that results in a spurious GALog event "only" results in a spurious IRQ, and doesn't trigger a use-after-free due to executing unloaded module code. Fixes: 5881f73757cc ("svm: Introduce AMD IOMMU avic_ga_log_notifier") Reported-by: Hou Wenlong Closes: https://lore.kernel.org/all/20250918130320.GA119526@k08j02272.eu95sqa Link: https://patch.msgid.link/20251016190643.80529-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/avic.c | 6 ++++++ arch/x86/kvm/svm/svm.c | 2 ++ arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 9 insertions(+) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index f286b5706d7c..3ab74f2bd584 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -1243,3 +1243,9 @@ bool __init avic_hardware_setup(void) return true; } + +void avic_hardware_unsetup(void) +{ + if (avic) + amd_iommu_register_ga_log_notifier(NULL); +} diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index efc3a7adebef..76055c0ba177 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -921,6 +921,8 @@ static void svm_hardware_unsetup(void) { int cpu; + avic_hardware_unsetup(); + sev_hardware_unsetup(); for_each_possible_cpu(cpu) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e4b04f435b3d..b0fe40c21728 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -805,6 +805,7 @@ extern struct kvm_x86_nested_ops svm_nested_ops; ) bool __init avic_hardware_setup(void); +void avic_hardware_unsetup(void); int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); From aaac099459f932b9dbaf85ca2a7251633cc213d0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 16 Oct 2025 12:06:43 -0700 Subject: [PATCH 0699/1075] KVM: SVM: Make avic_ga_log_notifier() local to avic.c Make amd_iommu_register_ga_log_notifier() a local symbol now that it's defined and used purely within avic.c. No functional change intended. Fixes: 4bdec12aa8d6 ("KVM: SVM: Detect X2APIC virtualization (x2AVIC) support") Link: https://patch.msgid.link/20251016190643.80529-4-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/avic.c | 2 +- arch/x86/kvm/svm/svm.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 3ab74f2bd584..89864fee6e83 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -216,7 +216,7 @@ static void avic_deactivate_vmcb(struct vcpu_svm *svm) * This function is called from IOMMU driver to notify * SVM to schedule in a particular vCPU of a particular VM. */ -int avic_ga_log_notifier(u32 ga_tag) +static int avic_ga_log_notifier(u32 ga_tag) { unsigned long flags; struct kvm_svm *kvm_svm; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index b0fe40c21728..8c36ee0d67ef 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -806,7 +806,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops; bool __init avic_hardware_setup(void); void avic_hardware_unsetup(void); -int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb); From fd92bd3b4445342e55f2c541c577796e0c3f1b8a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 30 Oct 2025 15:41:30 -0400 Subject: [PATCH 0700/1075] KVM: SVM: switch to raw spinlock for svm->ir_list_lock Use a raw spinlock for vcpu_svm.ir_list_lock as the lock can be taken during schedule() via kvm_sched_out() => __avic_vcpu_put(), and "normal" spinlocks are sleepable locks when PREEMPT_RT=y. This fixes the following lockdep warning: ============================= [ BUG: Invalid wait context ] 6.12.0-146.1640_2124176644.el10.x86_64+debug #1 Not tainted ----------------------------- qemu-kvm/38299 is trying to lock: ff11000239725600 (&svm->ir_list_lock){....}-{3:3}, at: __avic_vcpu_put+0xfd/0x300 [kvm_amd] other info that might help us debug this: context-{5:5} 2 locks held by qemu-kvm/38299: #0: ff11000239723ba8 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x240/0xe00 [kvm] #1: ff11000b906056d8 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested+0x2e/0x130 stack backtrace: CPU: 1 UID: 0 PID: 38299 Comm: qemu-kvm Kdump: loaded Not tainted 6.12.0-146.1640_2124176644.el10.x86_64+debug #1 PREEMPT(voluntary) Hardware name: AMD Corporation QUARTZ/QUARTZ, BIOS RQZ100AB 09/14/2023 Call Trace: dump_stack_lvl+0x6f/0xb0 __lock_acquire+0x921/0xb80 lock_acquire.part.0+0xbe/0x270 _raw_spin_lock_irqsave+0x46/0x90 __avic_vcpu_put+0xfd/0x300 [kvm_amd] svm_vcpu_put+0xfa/0x130 [kvm_amd] kvm_arch_vcpu_put+0x48c/0x790 [kvm] kvm_sched_out+0x161/0x1c0 [kvm] prepare_task_switch+0x36b/0xf60 __schedule+0x4f7/0x1890 schedule+0xd4/0x260 xfer_to_guest_mode_handle_work+0x54/0xc0 vcpu_run+0x69a/0xa70 [kvm] kvm_arch_vcpu_ioctl_run+0xdc0/0x17e0 [kvm] kvm_vcpu_ioctl+0x39f/0xe00 [kvm] Signed-off-by: Maxim Levitsky Link: https://patch.msgid.link/20251030194130.307900-1-mlevitsk@redhat.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- arch/x86/kvm/svm/avic.c | 16 ++++++++-------- arch/x86/kvm/svm/svm.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 89864fee6e83..fef00546c885 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -788,7 +788,7 @@ int avic_init_vcpu(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; INIT_LIST_HEAD(&svm->ir_list); - spin_lock_init(&svm->ir_list_lock); + raw_spin_lock_init(&svm->ir_list_lock); if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm)) return 0; @@ -816,9 +816,9 @@ static void svm_ir_list_del(struct kvm_kernel_irqfd *irqfd) if (!vcpu) return; - spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); + raw_spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); list_del(&irqfd->vcpu_list); - spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); } int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, @@ -855,7 +855,7 @@ int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, * list of IRQs being posted to the vCPU, to ensure the IRTE * isn't programmed with stale pCPU/IsRunning information. */ - guard(spinlock_irqsave)(&svm->ir_list_lock); + guard(raw_spinlock_irqsave)(&svm->ir_list_lock); /* * Update the target pCPU for IOMMU doorbells if the vCPU is @@ -972,7 +972,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, * up-to-date entry information, or that this task will wait until * svm_ir_list_add() completes to set the new target pCPU. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); entry = svm->avic_physical_id_entry; WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); @@ -997,7 +997,7 @@ static void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu, avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, action); - spin_unlock_irqrestore(&svm->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -1035,7 +1035,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action) * or that this task will wait until svm_ir_list_add() completes to * mark the vCPU as not running. */ - spin_lock_irqsave(&svm->ir_list_lock, flags); + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); avic_update_iommu_vcpu_affinity(vcpu, -1, action); @@ -1059,7 +1059,7 @@ static void __avic_vcpu_put(struct kvm_vcpu *vcpu, enum avic_vcpu_action action) svm->avic_physical_id_entry = entry; - spin_unlock_irqrestore(&svm->ir_list_lock, flags); + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); } void avic_vcpu_put(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 8c36ee0d67ef..c856d8e0f95e 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -329,7 +329,7 @@ struct vcpu_svm { * back into remapped mode). */ struct list_head ir_list; - spinlock_t ir_list_lock; + raw_spinlock_t ir_list_lock; struct vcpu_sev_es_state sev_es; From ae431059e75d36170a5ae6b44cc4d06d43613215 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 3 Nov 2025 17:12:05 -0800 Subject: [PATCH 0701/1075] KVM: guest_memfd: Remove bindings on memslot deletion when gmem is dying When unbinding a memslot from a guest_memfd instance, remove the bindings even if the guest_memfd file is dying, i.e. even if its file refcount has gone to zero. If the memslot is freed before the file is fully released, nullifying the memslot side of the binding in kvm_gmem_release() will write to freed memory, as detected by syzbot+KASAN: ================================================================== BUG: KASAN: slab-use-after-free in kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353 Write of size 8 at addr ffff88807befa508 by task syz.0.17/6022 CPU: 0 UID: 0 PID: 6022 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025 Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xca/0x240 mm/kasan/report.c:482 kasan_report+0x118/0x150 mm/kasan/report.c:595 kvm_gmem_release+0x176/0x440 virt/kvm/guest_memfd.c:353 __fput+0x44c/0xa70 fs/file_table.c:468 task_work_run+0x1d4/0x260 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xe9/0x130 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x2bd/0xfa0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fbeeff8efc9 Allocated by task 6023: kasan_save_stack mm/kasan/common.c:56 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:77 poison_kmalloc_redzone mm/kasan/common.c:397 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:414 kasan_kmalloc include/linux/kasan.h:262 [inline] __kmalloc_cache_noprof+0x3e2/0x700 mm/slub.c:5758 kmalloc_noprof include/linux/slab.h:957 [inline] kzalloc_noprof include/linux/slab.h:1094 [inline] kvm_set_memory_region+0x747/0xb90 virt/kvm/kvm_main.c:2104 kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154 kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 6023: kasan_save_stack mm/kasan/common.c:56 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:77 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:584 poison_slab_object mm/kasan/common.c:252 [inline] __kasan_slab_free+0x5c/0x80 mm/kasan/common.c:284 kasan_slab_free include/linux/kasan.h:234 [inline] slab_free_hook mm/slub.c:2533 [inline] slab_free mm/slub.c:6622 [inline] kfree+0x19a/0x6d0 mm/slub.c:6829 kvm_set_memory_region+0x9c4/0xb90 virt/kvm/kvm_main.c:2130 kvm_vm_ioctl_set_memory_region+0x6f/0xd0 virt/kvm/kvm_main.c:2154 kvm_vm_ioctl+0x957/0xc60 virt/kvm/kvm_main.c:5201 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:597 [inline] __se_sys_ioctl+0xfc/0x170 fs/ioctl.c:583 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Deliberately don't acquire filemap invalid lock when the file is dying as the lifecycle of f_mapping is outside the purview of KVM. Dereferencing the mapping is *probably* fine, but there's no need to invalidate anything as memslot deletion is responsible for zapping SPTEs, and the only code that can access the dying file is kvm_gmem_release(), whose core code is mutually exclusive with unbinding. Note, the mutual exclusivity is also what makes it safe to access the bindings on a dying gmem instance. Unbinding either runs with slots_lock held, or after the last reference to the owning "struct kvm" is put, and kvm_gmem_release() nullifies the slot pointer under slots_lock, and puts its reference to the VM after that is done. Reported-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68fa7a22.a70a0220.3bf6c6.008b.GAE@google.com Tested-by: syzbot+2479e53d0db9b32ae2aa@syzkaller.appspotmail.com Fixes: a7800aa80ea4 ("KVM: Add KVM_CREATE_GUEST_MEMFD ioctl() for guest-specific backing memory") Cc: stable@vger.kernel.org Cc: Hillf Danton Reviewed-By: Vishal Annapurve Link: https://patch.msgid.link/20251104011205.3853541-1-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/guest_memfd.c | 47 +++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index fbca8c0972da..ffadc5ee8e04 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -623,24 +623,11 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, return r; } -void kvm_gmem_unbind(struct kvm_memory_slot *slot) +static void __kvm_gmem_unbind(struct kvm_memory_slot *slot, struct kvm_gmem *gmem) { unsigned long start = slot->gmem.pgoff; unsigned long end = start + slot->npages; - struct kvm_gmem *gmem; - struct file *file; - /* - * Nothing to do if the underlying file was already closed (or is being - * closed right now), kvm_gmem_release() invalidates all bindings. - */ - file = kvm_gmem_get_file(slot); - if (!file) - return; - - gmem = file->private_data; - - filemap_invalidate_lock(file->f_mapping); xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL); /* @@ -648,6 +635,38 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) * cannot see this memslot. */ WRITE_ONCE(slot->gmem.file, NULL); +} + +void kvm_gmem_unbind(struct kvm_memory_slot *slot) +{ + struct file *file; + + /* + * Nothing to do if the underlying file was _already_ closed, as + * kvm_gmem_release() invalidates and nullifies all bindings. + */ + if (!slot->gmem.file) + return; + + file = kvm_gmem_get_file(slot); + + /* + * However, if the file is _being_ closed, then the bindings need to be + * removed as kvm_gmem_release() might not run until after the memslot + * is freed. Note, modifying the bindings is safe even though the file + * is dying as kvm_gmem_release() nullifies slot->gmem.file under + * slots_lock, and only puts its reference to KVM after destroying all + * bindings. I.e. reaching this point means kvm_gmem_release() hasn't + * yet destroyed the bindings or freed the gmem_file, and can't do so + * until the caller drops slots_lock. + */ + if (!file) { + __kvm_gmem_unbind(slot, slot->gmem.file->private_data); + return; + } + + filemap_invalidate_lock(file->f_mapping); + __kvm_gmem_unbind(slot, file->private_data); filemap_invalidate_unlock(file->f_mapping); fput(file); From 2b81082ad37cc3f28355fb73a6a69b91ff7dbf20 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 3 Nov 2025 12:11:24 -0700 Subject: [PATCH 0702/1075] lib/crypto: curve25519-hacl64: Fix older clang KASAN workaround for GCC Commit 2f13daee2a72 ("lib/crypto/curve25519-hacl64: Disable KASAN with clang-17 and older") inadvertently disabled KASAN in curve25519-hacl64.o for GCC unconditionally because clang-min-version will always evaluate to nothing for GCC. Add a check for CONFIG_CC_IS_CLANG to avoid applying the workaround for GCC, which is only needed for clang-17 and older. Cc: stable@vger.kernel.org Fixes: 2f13daee2a72 ("lib/crypto/curve25519-hacl64: Disable KASAN with clang-17 and older") Signed-off-by: Nathan Chancellor Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251103-curve25519-hacl64-fix-kasan-workaround-v2-1-ab581cbd8035@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index bded351aeace..d2845b214585 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -90,7 +90,7 @@ else libcurve25519-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += curve25519-fiat32.o endif # clang versions prior to 18 may blow out the stack with KASAN -ifeq ($(call clang-min-version, 180000),) +ifeq ($(CONFIG_CC_IS_CLANG)_$(call clang-min-version, 180000),y_) KASAN_SANITIZE_curve25519-hacl64.o := n endif From 44e8241c51f762aafa50ed116da68fd6ecdcc954 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 3 Nov 2025 21:49:06 -0800 Subject: [PATCH 0703/1075] lib/crypto: arm/curve25519: Disable on CPU_BIG_ENDIAN On big endian arm kernels, the arm optimized Curve25519 code produces incorrect outputs and fails the Curve25519 test. This has been true ever since this code was added. It seems that hardly anyone (or even no one?) actually uses big endian arm kernels. But as long as they're ostensibly supported, we should disable this code on them so that it's not accidentally used. Note: for future-proofing, use !CPU_BIG_ENDIAN instead of CPU_LITTLE_ENDIAN. Both of these are arch-specific options that could get removed in the future if big endian support gets dropped. Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation") Cc: stable@vger.kernel.org Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251104054906.716914-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 8886055e938f..16859c6226dd 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -64,7 +64,7 @@ config CRYPTO_LIB_CURVE25519 config CRYPTO_LIB_CURVE25519_ARCH bool depends on CRYPTO_LIB_CURVE25519 && !UML && !KMSAN - default y if ARM && KERNEL_MODE_NEON + default y if ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN default y if PPC64 && CPU_LITTLE_ENDIAN default y if X86_64 From 789521b4717fd6bd85164ba5c131f621a79c9736 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Sat, 1 Nov 2025 10:40:11 +0100 Subject: [PATCH 0704/1075] rust: kbuild: support `-Cjump-tables=n` for Rust 1.93.0 Rust 1.93.0 (expected 2026-01-22) is stabilizing `-Zno-jump-tables` [1][2] as `-Cjump-tables=n` [3]. Without this change, one would eventually see: RUSTC L rust/core.o error: unknown unstable option: `no-jump-tables` Thus support the upcoming version. Link: https://github.com/rust-lang/rust/issues/116592 [1] Link: https://github.com/rust-lang/rust/pull/105812 [2] Link: https://github.com/rust-lang/rust/pull/145974 [3] Reviewed-by: Alice Ryhl Reviewed-by: Trevor Gross Acked-by: Nicolas Schier Link: https://patch.msgid.link/20251101094011.1024534-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- arch/loongarch/Makefile | 2 +- arch/x86/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index dc5bd3f1b8d2..96ca1a688984 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -109,7 +109,7 @@ endif ifdef CONFIG_RUSTC_HAS_ANNOTATE_TABLEJUMP KBUILD_RUSTFLAGS += -Cllvm-args=--loongarch-annotate-tablejump else -KBUILD_RUSTFLAGS += -Zno-jump-tables # keep compatibility with older compilers +KBUILD_RUSTFLAGS += $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables) # keep compatibility with older compilers endif ifdef CONFIG_LTO_CLANG # The annotate-tablejump option can not be passed to LLVM backend when LTO is enabled. diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 8fbff3106c56..1a27efcf3c20 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -98,7 +98,7 @@ ifeq ($(CONFIG_X86_KERNEL_IBT),y) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 # KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables) -KBUILD_RUSTFLAGS += -Zcf-protection=branch -Zno-jump-tables +KBUILD_RUSTFLAGS += -Zcf-protection=branch $(if $(call rustc-min-version,109300),-Cjump-tables=n,-Zno-jump-tables) else KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) endif From 90b75e12a6e831c8516498f690058d4165d5a5d6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 14 Oct 2025 16:45:17 -0400 Subject: [PATCH 0705/1075] drm/amdgpu: set default gfx reset masks for gfx6-8 These were not set so soft recovery was inadvertantly disabled. Fixes: 6ac55eab4fc4 ("drm/amdgpu: move reset support type checks into the caller") Reviewed-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit 1972763505d728c604b537180727ec8132e619df) --- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 7693b7953426..80565392313f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3102,6 +3102,11 @@ static int gfx_v6_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 5976ed55d9db..2b7aba22ecc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4399,6 +4399,11 @@ static int gfx_v7_0_sw_init(struct amdgpu_ip_block *ip_block) gfx_v7_0_gpu_early_init(adev); + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0856ff65288c..8a81713d97aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -2023,6 +2023,11 @@ static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + adev->gfx.gfx_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); + adev->gfx.compute_supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + return 0; } From 37e3567dee273f68726373b342b38234bafe4cf9 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 30 Oct 2025 13:06:24 +0800 Subject: [PATCH 0706/1075] drm/amd/pm: fix missing device_attr cleanup in amdgpu_pm_sysfs_init() Use the correct label to complete all cleanup work. Fixes: 4d154b1ca580 ("drm/amd/pm: Add support for DPM policies") Fixes: 25e82f2e2c59 ("drm/amd/pm: Add temperature metrics sysfs entry") Signed-off-by: Yang Wang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 4c4c138a1c86775c4d47e24f26357a1f8b64d0a3) --- drivers/gpu/drm/amd/pm/amdgpu_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index b5fbb0fd1dc0..a7e6d7854b7b 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -4724,14 +4724,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) ret = devm_device_add_group(adev->dev, &amdgpu_pm_policy_attr_group); if (ret) - goto err_out0; + goto err_out1; } if (amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) { ret = devm_device_add_group(adev->dev, &amdgpu_board_attr_group); if (ret) - goto err_out0; + goto err_out1; if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT, (void *)&tmp) != -EOPNOTSUPP) { sysfs_add_file_to_group(&adev->dev->kobj, From 3362692fea915ce56345366364a501c629c9ff17 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 30 Oct 2025 14:39:43 -0500 Subject: [PATCH 0707/1075] drm/amd/display: Don't stretch non-native images by default in eDP commit 978fa2f6d0b12 ("drm/amd/display: Use scaling for non-native resolutions on eDP") started using the GPU scaler hardware to scale when a non-native resolution was picked on eDP. This scaling was done to fill the screen instead of maintain aspect ratio. The idea was supposed to be that if a different scaling behavior is preferred then the compositor would request it. The not following aspect ratio behavior however isn't desirable, so adjust it to follow aspect ratio and still try to fill screen. Note: This will lead to black bars in some cases for non-native resolutions. Compositors can request the previous behavior if desired. Fixes: 978fa2f6d0b1 ("drm/amd/display: Use scaling for non-native resolutions on eDP") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4538 Signed-off-by: Mario Limonciello (AMD) Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 825df7ff4bb1a383ad4827545e09aec60d230770) --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index bfa3199591b6..2fe6520207d6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -8030,7 +8030,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder, "mode %dx%d@%dHz is not native, enabling scaling\n", adjusted_mode->hdisplay, adjusted_mode->vdisplay, drm_mode_vrefresh(adjusted_mode)); - dm_new_connector_state->scaling = RMX_FULL; + dm_new_connector_state->scaling = RMX_ASPECT; } return 0; } From fdc93beeadc2439e5e85d056a8fe681dcced09da Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Wed, 22 Oct 2025 16:19:34 -0600 Subject: [PATCH 0708/1075] drm/amd/display: Fix black screen with HDMI outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why & How] This fixes the black screen issue on certain APUs with HDMI, accompanied by the following messages: amdgpu 0000:c4:00.0: amdgpu: [drm] Failed to setup vendor info frame on connector DP-1: -22 amdgpu 0000:c4:00.0: [drm] Cannot find any crtc or sizes [drm] Cannot find any crtc or sizes Fixes: 489f0f600ce2 ("drm/amd/display: Fix DVI-D/HDMI adapters") Suggested-by: Timur Kristóf Reviewed-by: Harry Wentland Signed-off-by: Alex Hung Signed-off-by: Ray Wu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 678c901443a6d2e909e3b51331a20f9d8f84ce82) --- drivers/gpu/drm/amd/display/dc/link/link_detection.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c index 85303167a553..1173c53359b0 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c @@ -1141,6 +1141,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, !sink->edid_caps.edid_hdmi) sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; else if (dc_is_dvi_signal(sink->sink_signal) && + dc_is_dvi_signal(link->connector_signal) && aud_support->hdmi_audio_native && sink->edid_caps.edid_hdmi) sink->sink_signal = SIGNAL_TYPE_HDMI_TYPE_A; From c81f5cebe849a2beeed4c8f2b06a58dfc02d5350 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 25 Oct 2025 23:29:36 -0500 Subject: [PATCH 0709/1075] drm/amdgpu: Drop PMFW RLC notifier from amdgpu_device_suspend() For S3 on vangogh, PMFW needs to be notified before the driver powers down RLC. This already happens in smu_disable_dpms() so drop the superfluous call in amdgpu_device_suspend(). Co-developed-by: Mario Limonciello (AMD) Reviewed-by: Alex Deucher Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Alex Deucher (cherry picked from commit 960e30a61e1a7ca5341a6cf9481e770e1cda24aa) --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- drivers/gpu/drm/amd/pm/amdgpu_dpm.c | 18 ------------------ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h | 2 -- 3 files changed, 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3d032c4e2dce..2819aceaab74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5243,10 +5243,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); - r = amdgpu_dpm_notify_rlc_state(adev, false); - if (r) - return r; - return 0; } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 518d07afc7df..bc29a923fa6e 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -195,24 +195,6 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, return ret; } -int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en) -{ - int ret = 0; - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - - if (pp_funcs && pp_funcs->notify_rlc_state) { - mutex_lock(&adev->pm.mutex); - - ret = pp_funcs->notify_rlc_state( - adev->powerplay.pp_handle, - en); - - mutex_unlock(&adev->pm.mutex); - } - - return ret; -} - int amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) { const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 65c1d98af26c..af48aead12f7 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -424,8 +424,6 @@ int amdgpu_dpm_mode1_reset(struct amdgpu_device *adev); int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, enum pp_mp1_state mp1_state); -int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en); - int amdgpu_dpm_set_gfx_power_up_by_imu(struct amdgpu_device *adev); int amdgpu_dpm_baco_exit(struct amdgpu_device *adev); From 7c5609b72bfe57d8c601d9561e0d2551b605c017 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 24 Oct 2025 13:08:11 -0400 Subject: [PATCH 0710/1075] drm/amdgpu/smu: Handle S0ix for vangogh Fix the flows for S0ix. There is no need to stop rlc or reintialize PMFW in S0ix. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4659 Reviewed-by: Mario Limonciello Reported-by: Antheas Kapenekakis Tested-by: Antheas Kapenekakis Signed-off-by: Alex Deucher (cherry picked from commit fd39b5a5830d8f2553e0c09d4d50bdff28b10080) Cc: # c81f5cebe849: drm/amdgpu: Drop PMFW RLC notifier from amdgpu_device_suspend() Cc: --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 6 ++++++ drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 3 +++ 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index fb8086859857..244b8c364d45 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2040,6 +2040,12 @@ static int smu_disable_dpms(struct smu_context *smu) smu->is_apu && (amdgpu_in_reset(adev) || adev->in_s0ix)) return 0; + /* vangogh s0ix */ + if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 5, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 5, 2)) && + adev->in_s0ix) + return 0; + /* * For gpu reset, runpm and hibernation through BACO, * BACO feature has to be kept enabled. diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 2c9869feba61..0708d0f0938b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -2217,6 +2217,9 @@ static int vangogh_post_smu_init(struct smu_context *smu) uint32_t total_cu = adev->gfx.config.max_cu_per_sh * adev->gfx.config.max_sh_per_se * adev->gfx.config.max_shader_engines; + if (adev->in_s0ix) + return 0; + /* allow message will be sent after enable message on Vangogh*/ if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT) && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { From 597eb70f7ff7551ff795cd51754b81aabedab67b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 31 Oct 2025 10:50:02 -0400 Subject: [PATCH 0711/1075] drm/amdkfd: Don't clear PT after process killed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If process is killed. the vm entity is stopped, submit pt update job will trigger the error message "*ERROR* Trying to push to a killed entity", job will not execute. Suggested-by: Christian König Signed-off-by: Philip Yang Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 10c382ec6c6d1e11975a11962bec21cba6360391) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a2ca9acf8c4e..923f0fa7350c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1267,6 +1267,10 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem, (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + /* VM entity stopped if process killed, don't clear freed pt bo */ + if (!amdgpu_vm_ready(vm)) + return 0; + (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); From 6dd97ceb645c08aca9fc871a3006e47fe699f0ac Mon Sep 17 00:00:00 2001 From: Rong Zhang Date: Tue, 14 Oct 2025 00:47:35 +0800 Subject: [PATCH 0712/1075] drm/amd/display: Fix NULL deref in debugfs odm_combine_segments When a connector is connected but inactive (e.g., disabled by desktop environments), pipe_ctx->stream_res.tg will be destroyed. Then, reading odm_combine_segments causes kernel NULL pointer dereference. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 16 UID: 0 PID: 26474 Comm: cat Not tainted 6.17.0+ #2 PREEMPT(lazy) e6a17af9ee6db7c63e9d90dbe5b28ccab67520c6 Hardware name: LENOVO 21Q4/LNVNB161216, BIOS PXCN25WW 03/27/2025 RIP: 0010:odm_combine_segments_show+0x93/0xf0 [amdgpu] Code: 41 83 b8 b0 00 00 00 01 75 6e 48 98 ba a1 ff ff ff 48 c1 e0 0c 48 8d 8c 07 d8 02 00 00 48 85 c9 74 2d 48 8b bc 07 f0 08 00 00 <48> 8b 07 48 8b 80 08 02 00> RSP: 0018:ffffd1bf4b953c58 EFLAGS: 00010286 RAX: 0000000000005000 RBX: ffff8e35976b02d0 RCX: ffff8e3aeed052d8 RDX: 00000000ffffffa1 RSI: ffff8e35a3120800 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff8e3580eb0000 R09: ffff8e35976b02d0 R10: ffffd1bf4b953c78 R11: 0000000000000000 R12: ffffd1bf4b953d08 R13: 0000000000040000 R14: 0000000000000001 R15: 0000000000000001 FS: 00007f44d3f9f740(0000) GS:ffff8e3caa47f000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000006485c2000 CR4: 0000000000f50ef0 PKRU: 55555554 Call Trace: seq_read_iter+0x125/0x490 ? __alloc_frozen_pages_noprof+0x18f/0x350 seq_read+0x12c/0x170 full_proxy_read+0x51/0x80 vfs_read+0xbc/0x390 ? __handle_mm_fault+0xa46/0xef0 ? do_syscall_64+0x71/0x900 ksys_read+0x73/0xf0 do_syscall_64+0x71/0x900 ? count_memcg_events+0xc2/0x190 ? handle_mm_fault+0x1d7/0x2d0 ? do_user_addr_fault+0x21a/0x690 ? exc_page_fault+0x7e/0x1a0 entry_SYSCALL_64_after_hwframe+0x6c/0x74 RIP: 0033:0x7f44d4031687 Code: 48 89 fa 4c 89 df e8 58 b3 00 00 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 1a 5b c3 0f 1f 84 00 00 00 00 00 48 8b 44 24 10 0f 05 <5b> c3 0f 1f 80 00 00 00 00> RSP: 002b:00007ffdb4b5f0b0 EFLAGS: 00000202 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 00007f44d3f9f740 RCX: 00007f44d4031687 RDX: 0000000000040000 RSI: 00007f44d3f5e000 RDI: 0000000000000003 RBP: 0000000000040000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000202 R12: 00007f44d3f5e000 R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000040000 Modules linked in: tls tcp_diag inet_diag xt_mark ccm snd_hrtimer snd_seq_dummy snd_seq_midi snd_seq_oss snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device x> snd_hda_codec_atihdmi snd_hda_codec_realtek_lib lenovo_wmi_helpers think_lmi snd_hda_codec_generic snd_hda_codec_hdmi snd_soc_core kvm snd_compress uvcvideo sn> platform_profile joydev amd_pmc mousedev mac_hid sch_fq_codel uinput i2c_dev parport_pc ppdev lp parport nvme_fabrics loop nfnetlink ip_tables x_tables dm_cryp> CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:odm_combine_segments_show+0x93/0xf0 [amdgpu] Code: 41 83 b8 b0 00 00 00 01 75 6e 48 98 ba a1 ff ff ff 48 c1 e0 0c 48 8d 8c 07 d8 02 00 00 48 85 c9 74 2d 48 8b bc 07 f0 08 00 00 <48> 8b 07 48 8b 80 08 02 00> RSP: 0018:ffffd1bf4b953c58 EFLAGS: 00010286 RAX: 0000000000005000 RBX: ffff8e35976b02d0 RCX: ffff8e3aeed052d8 RDX: 00000000ffffffa1 RSI: ffff8e35a3120800 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff8e3580eb0000 R09: ffff8e35976b02d0 R10: ffffd1bf4b953c78 R11: 0000000000000000 R12: ffffd1bf4b953d08 R13: 0000000000040000 R14: 0000000000000001 R15: 0000000000000001 FS: 00007f44d3f9f740(0000) GS:ffff8e3caa47f000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000006485c2000 CR4: 0000000000f50ef0 PKRU: 55555554 Fix this by checking pipe_ctx->stream_res.tg before dereferencing. Fixes: 07926ba8a44f ("drm/amd/display: Add debugfs interface for ODM combine info") Signed-off-by: Rong Zhang Reviewed-by: Mario Limoncello Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher (cherry picked from commit f19bbecd34e3c15eed7e5e593db2ac0fc7a0e6d8) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index f263e1a4537e..00dac862b665 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -1302,7 +1302,8 @@ static int odm_combine_segments_show(struct seq_file *m, void *unused) if (connector->status != connector_status_connected) return -ENODEV; - if (pipe_ctx != NULL && pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments) + if (pipe_ctx && pipe_ctx->stream_res.tg && + pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments) pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments(pipe_ctx->stream_res.tg, &segments); seq_printf(m, "%d\n", segments); From b540de9e3b4fab3b9e10f30714a6f5c1b2a50ec3 Mon Sep 17 00:00:00 2001 From: Shuhao Fu Date: Tue, 4 Nov 2025 23:13:15 +0800 Subject: [PATCH 0713/1075] smb: client: fix refcount leak in smb2_set_path_attr Fix refcount leak in `smb2_set_path_attr` when path conversion fails. Function `cifs_get_writable_path` returns `cfile` with its reference counter `cfile->count` increased on success. Function `smb2_compound_op` would decrease the reference counter for `cfile`, as stated in its comment. By calling `smb2_rename_path`, the reference counter of `cfile` would leak if `cifs_convert_path_to_utf16` fails in `smb2_set_path_attr`. Fixes: 8de9e86c67ba ("cifs: create a helper to find a writeable handle by path name") Acked-by: Henrique Carvalho Signed-off-by: Shuhao Fu Signed-off-by: Steve French --- fs/smb/client/smb2inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 09e3fc81d7cb..69cb81fa0d3a 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1294,6 +1294,8 @@ static int smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon, smb2_to_name = cifs_convert_path_to_utf16(to_name, cifs_sb); if (smb2_to_name == NULL) { rc = -ENOMEM; + if (cfile) + cifsFileInfo_put(cfile); goto smb2_rename_path; } in_iov.iov_base = smb2_to_name; From 1e39da974ce621ed874c6d3aaf65ad14848c9f0d Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 4 Nov 2025 16:36:42 -0800 Subject: [PATCH 0714/1075] fscrypt: fix left shift underflow when inode->i_blkbits > PAGE_SHIFT When simulating an nvme device on qemu with both logical_block_size and physical_block_size set to 8 KiB, an error trace appears during partition table reading at boot time. The issue is caused by inode->i_blkbits being larger than PAGE_SHIFT, which leads to a left shift of -1 and triggering a UBSAN warning. [ 2.697306] ------------[ cut here ]------------ [ 2.697309] UBSAN: shift-out-of-bounds in fs/crypto/inline_crypt.c:336:37 [ 2.697311] shift exponent -1 is negative [ 2.697315] CPU: 3 UID: 0 PID: 274 Comm: (udev-worker) Not tainted 6.18.0-rc2+ #34 PREEMPT(voluntary) [ 2.697317] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [ 2.697320] Call Trace: [ 2.697324] [ 2.697325] dump_stack_lvl+0x76/0xa0 [ 2.697340] dump_stack+0x10/0x20 [ 2.697342] __ubsan_handle_shift_out_of_bounds+0x1e3/0x390 [ 2.697351] bh_get_inode_and_lblk_num.cold+0x12/0x94 [ 2.697359] fscrypt_set_bio_crypt_ctx_bh+0x44/0x90 [ 2.697365] submit_bh_wbc+0xb6/0x190 [ 2.697370] block_read_full_folio+0x194/0x270 [ 2.697371] ? __pfx_blkdev_get_block+0x10/0x10 [ 2.697375] ? __pfx_blkdev_read_folio+0x10/0x10 [ 2.697377] blkdev_read_folio+0x18/0x30 [ 2.697379] filemap_read_folio+0x40/0xe0 [ 2.697382] filemap_get_pages+0x5ef/0x7a0 [ 2.697385] ? mmap_region+0x63/0xd0 [ 2.697389] filemap_read+0x11d/0x520 [ 2.697392] blkdev_read_iter+0x7c/0x180 [ 2.697393] vfs_read+0x261/0x390 [ 2.697397] ksys_read+0x71/0xf0 [ 2.697398] __x64_sys_read+0x19/0x30 [ 2.697399] x64_sys_call+0x1e88/0x26a0 [ 2.697405] do_syscall_64+0x80/0x670 [ 2.697410] ? __x64_sys_newfstat+0x15/0x20 [ 2.697414] ? x64_sys_call+0x204a/0x26a0 [ 2.697415] ? do_syscall_64+0xb8/0x670 [ 2.697417] ? irqentry_exit_to_user_mode+0x2e/0x2a0 [ 2.697420] ? irqentry_exit+0x43/0x50 [ 2.697421] ? exc_page_fault+0x90/0x1b0 [ 2.697422] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 2.697425] RIP: 0033:0x75054cba4a06 [ 2.697426] Code: 5d e8 41 8b 93 08 03 00 00 59 5e 48 83 f8 fc 75 19 83 e2 39 83 fa 08 75 11 e8 26 ff ff ff 66 0f 1f 44 00 00 48 8b 45 10 0f 05 <48> 8b 5d f8 c9 c3 0f 1f 40 00 f3 0f 1e fa 55 48 89 e5 48 83 ec 08 [ 2.697427] RSP: 002b:00007fff973723a0 EFLAGS: 00000202 ORIG_RAX: 0000000000000000 [ 2.697430] RAX: ffffffffffffffda RBX: 00005ea9a2c02760 RCX: 000075054cba4a06 [ 2.697432] RDX: 0000000000002000 RSI: 000075054c190000 RDI: 000000000000001b [ 2.697433] RBP: 00007fff973723c0 R08: 0000000000000000 R09: 0000000000000000 [ 2.697434] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000 [ 2.697434] R13: 00005ea9a2c027c0 R14: 00005ea9a2be5608 R15: 00005ea9a2be55f0 [ 2.697436] [ 2.697436] ---[ end trace ]--- This situation can happen for block devices because when CONFIG_TRANSPARENT_HUGEPAGE is enabled, the maximum logical_block_size is 64 KiB. set_init_blocksize() then sets the block device inode->i_blkbits to 13, which is within this limit. File I/O does not trigger this problem because for filesystems that do not support the FS_LBS feature, sb_set_blocksize() prevents sb->s_blocksize_bits from being larger than PAGE_SHIFT. During inode allocation, alloc_inode()->inode_init_always() assigns inode->i_blkbits from sb->s_blocksize_bits. Currently, only xfs_fs_type has the FS_LBS flag, and since xfs I/O paths do not reach submit_bh_wbc(), it does not hit the left-shift underflow issue. Signed-off-by: Yongpeng Yang Fixes: 47dd67532303 ("block/bdev: lift block size restrictions to 64k") Cc: stable@vger.kernel.org [EB: use folio_pos() and consolidate the two shifts by i_blkbits] Link: https://lore.kernel.org/r/20251105003642.42796-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/crypto/inline_crypt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 5dee7c498bc8..ed6e926226b5 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -333,8 +333,7 @@ static bool bh_get_inode_and_lblk_num(const struct buffer_head *bh, inode = mapping->host; *inode_ret = inode; - *lblk_num_ret = ((u64)folio->index << (PAGE_SHIFT - inode->i_blkbits)) + - (bh_offset(bh) >> inode->i_blkbits); + *lblk_num_ret = (folio_pos(folio) + bh_offset(bh)) >> inode->i_blkbits; return true; } From c3838262b824c71c145cd3668722e99a69bc9cd9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 31 Oct 2025 14:05:51 +0800 Subject: [PATCH 0715/1075] virtio_net: fix alignment for virtio_net_hdr_v1_hash Changing alignment of header would mean it's no longer safe to cast a 2 byte aligned pointer between formats. Use two 16 bit fields to make it 2 byte aligned as previously. This fixes the performance regression since commit ("virtio_net: enable gso over UDP tunnel support.") as it uses virtio_net_hdr_v1_hash_tunnel which embeds virtio_net_hdr_v1_hash. Pktgen in guest + XDP_DROP on TAP + vhost_net shows the TX PPS is recovered from 2.4Mpps to 4.45Mpps. Fixes: 56a06bd40fab ("virtio_net: enable gso over UDP tunnel support.") Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin Signed-off-by: Jason Wang Tested-by: Lei Yang Link: https://patch.msgid.link/20251031060551.126-1-jasowang@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 15 +++++++++++++-- include/linux/virtio_net.h | 3 ++- include/uapi/linux/virtio_net.h | 3 ++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8e8a179aaa49..e6e650bc3bc3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2539,6 +2539,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, return NULL; } +static inline u32 +virtio_net_hash_value(const struct virtio_net_hdr_v1_hash *hdr_hash) +{ + return __le16_to_cpu(hdr_hash->hash_value_lo) | + (__le16_to_cpu(hdr_hash->hash_value_hi) << 16); +} + static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, struct sk_buff *skb) { @@ -2565,7 +2572,7 @@ static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, default: rss_hash_type = PKT_HASH_TYPE_NONE; } - skb_set_hash(skb, __le32_to_cpu(hdr_hash->hash_value), rss_hash_type); + skb_set_hash(skb, virtio_net_hash_value(hdr_hash), rss_hash_type); } static void virtnet_receive_done(struct virtnet_info *vi, struct receive_queue *rq, @@ -3311,6 +3318,10 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb, bool orphan) pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest); + /* Make sure it's safe to cast between formats */ + BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr)); + BUILD_BUG_ON(__alignof__(*hdr) != __alignof__(hdr->hash_hdr.hdr)); + can_push = vi->any_header_sg && !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) && !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len; @@ -6750,7 +6761,7 @@ static int virtnet_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash, hash_report = VIRTIO_NET_HASH_REPORT_NONE; *rss_type = virtnet_xdp_rss_type[hash_report]; - *hash = __le32_to_cpu(hdr_hash->hash_value); + *hash = virtio_net_hash_value(hdr_hash); return 0; } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 4d1780848d0e..b673c31569f3 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -401,7 +401,8 @@ virtio_net_hdr_tnl_from_skb(const struct sk_buff *skb, if (!tnl_hdr_negotiated) return -EINVAL; - vhdr->hash_hdr.hash_value = 0; + vhdr->hash_hdr.hash_value_lo = 0; + vhdr->hash_hdr.hash_value_hi = 0; vhdr->hash_hdr.hash_report = 0; vhdr->hash_hdr.padding = 0; diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 8bf27ab8bcb4..1db45b01532b 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -193,7 +193,8 @@ struct virtio_net_hdr_v1 { struct virtio_net_hdr_v1_hash { struct virtio_net_hdr_v1 hdr; - __le32 hash_value; + __le16 hash_value_lo; + __le16 hash_value_hi; #define VIRTIO_NET_HASH_REPORT_NONE 0 #define VIRTIO_NET_HASH_REPORT_IPv4 1 #define VIRTIO_NET_HASH_REPORT_TCPv4 2 From 284922f4c563aa3a8558a00f2a05722133237fe8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 4 Nov 2025 15:25:20 +0900 Subject: [PATCH 0716/1075] x86: uaccess: don't use runtime-const rewriting in modules The runtime-const infrastructure was never designed to handle the modular case, because the constant fixup is only done at boot time for core kernel code. But by the time I used it for the x86-64 user space limit handling in commit 86e6b1547b3d ("x86: fix user address masking non-canonical speculation issue"), I had completely repressed that fact. And it all happens to work because the only code that currently actually gets inlined by modules is for the access_ok() limit check, where the default constant value works even when not fixed up. Because at least I had intentionally made it be something that is in the non-canonical address space region. But it's technically very wrong, and it does mean that at least in theory, the use of 'access_ok()' + '__get_user()' can trigger the same speculation issue with non-canonical addresses that the original commit was all about. The pattern is unusual enough that this probably doesn't matter in practice, but very wrong is still very wrong. Also, let's fix it before the nice optimized scoped user accessor helpers that Thomas Gleixner is working on cause this pseudo-constant to then be more widely used. This all came up due to an unrelated discussion with Mateusz Guzik about using the runtime const infrastructure for names_cachep accesses too. There the modular case was much more obviously broken, and Mateusz noted it in his 'v2' of the patch series. That then made me notice how broken 'access_ok()' had been in modules all along. Mea culpa, mea maxima culpa. Fix it by simply not using the runtime-const code in modules, and just using the USER_PTR_MAX variable value instead. This is not performance-critical like the core user accessor functions (get_user() and friends) are. Also make sure this doesn't get forgotten the next time somebody wants to do runtime constant optimizations by having the x86 runtime-const.h header file error out if included by modules. Fixes: 86e6b1547b3d ("x86: fix user address masking non-canonical speculation issue") Acked-by: Borislav Petkov Acked-by: Sean Christopherson Cc: Thomas Gleixner Triggered-by: Mateusz Guzik Link: https://lore.kernel.org/all/20251030105242.801528-1-mjguzik@gmail.com/ Signed-off-by: Linus Torvalds --- arch/x86/include/asm/runtime-const.h | 4 ++++ arch/x86/include/asm/uaccess_64.h | 10 +++++----- arch/x86/kernel/cpu/common.c | 6 +++++- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h index 8d983cfd06ea..e5a13dc8816e 100644 --- a/arch/x86/include/asm/runtime-const.h +++ b/arch/x86/include/asm/runtime-const.h @@ -2,6 +2,10 @@ #ifndef _ASM_RUNTIME_CONST_H #define _ASM_RUNTIME_CONST_H +#ifdef MODULE + #error "Cannot use runtime-const infrastructure from modules" +#endif + #ifdef __ASSEMBLY__ .macro RUNTIME_CONST_PTR sym reg diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index c8a5ae35c871..641f45c22f9d 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -12,12 +12,12 @@ #include #include #include -#include -/* - * Virtual variable: there's no actual backing store for this, - * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)' - */ +#ifdef MODULE + #define runtime_const_ptr(sym) (sym) +#else + #include +#endif extern unsigned long USER_PTR_MAX; #ifdef CONFIG_ADDRESS_MASKING diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c7d3512914ca..02d97834a1d4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -78,6 +78,10 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); +/* Used for modules: built-in code uses runtime constants */ +unsigned long USER_PTR_MAX; +EXPORT_SYMBOL(USER_PTR_MAX); + u32 elf_hwcap2 __read_mostly; /* Number of siblings per CPU package */ @@ -2579,7 +2583,7 @@ void __init arch_cpu_finalize_init(void) alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { - unsigned long USER_PTR_MAX = TASK_SIZE_MAX; + USER_PTR_MAX = TASK_SIZE_MAX; /* * Enable this when LAM is gated on LASS support From bc7208ca805ae6062f353a4753467d913d963bc6 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 3 Nov 2025 16:56:55 -0800 Subject: [PATCH 0717/1075] bnxt_en: Shutdown FW DMA in bnxt_shutdown() The netif_close() call in bnxt_shutdown() only stops packet DMA. There may be FW DMA for trace logging (recently added) that will continue. If we kexec to a new kernel, the DMA will corrupt memory in the new kernel. Add bnxt_hwrm_func_drv_unrgtr() to unregister the driver from the FW. This will stop the FW DMA. In case the call fails, call pcie_flr() to reset the function and stop the DMA. Fixes: 24d694aec139 ("bnxt_en: Allocate backing store memory for FW trace logs") Reported-by: Jakub Kicinski Reviewed-by: Damodharam Ammepalli Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 3fc33b1b4dfb..c0e9caa1df73 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -16892,6 +16892,10 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (netif_running(dev)) netif_close(dev); + if (bnxt_hwrm_func_drv_unrgtr(bp)) { + pcie_flr(pdev); + goto shutdown_exit; + } bnxt_ptp_clear(bp); bnxt_clear_int_mode(bp); pci_disable_device(pdev); From deb8eb39164382f1f67ef8e8af9176baf5e10f2d Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Mon, 3 Nov 2025 16:56:56 -0800 Subject: [PATCH 0718/1075] bnxt_en: Fix a possible memory leak in bnxt_ptp_init In bnxt_ptp_init(), when ptp_clock_register() fails, the driver is not freeing the memory allocated for ptp_info->pin_config. Fix it to unconditionally free ptp_info->pin_config in bnxt_ptp_free(). Fixes: caf3eedbcd8d ("bnxt_en: 1PPS support for 5750X family chips") Reviewed-by: Pavan Chebbi Reviewed-by: Somnath Kotur Signed-off-by: Kalesh AP Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index db81cf6d5289..0abaa2bbe357 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -1051,9 +1051,9 @@ static void bnxt_ptp_free(struct bnxt *bp) if (ptp->ptp_clock) { ptp_clock_unregister(ptp->ptp_clock); ptp->ptp_clock = NULL; - kfree(ptp->ptp_info.pin_config); - ptp->ptp_info.pin_config = NULL; } + kfree(ptp->ptp_info.pin_config); + ptp->ptp_info.pin_config = NULL; } int bnxt_ptp_init(struct bnxt *bp) From ff02be05f78399c766be68ab0b2285ff90b2aaa8 Mon Sep 17 00:00:00 2001 From: Gautam R A Date: Mon, 3 Nov 2025 16:56:57 -0800 Subject: [PATCH 0719/1075] bnxt_en: Fix null pointer dereference in bnxt_bs_trace_check_wrap() With older FW, we may get the ASYNC_EVENT_CMPL_EVENT_ID_DBG_BUF_PRODUCER for FW trace data type that has not been initialized. This will result in a crash in bnxt_bs_trace_type_wrap(). Add a guard to check for a valid magic_byte pointer before proceeding. Fixes: 84fcd9449fd7 ("bnxt_en: Manage the FW trace context memory") Reviewed-by: Somnath Kotur Reviewed-by: Shruti Parab Signed-off-by: Gautam R A Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 741b2d854789..7df46a21dd18 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2149,7 +2149,7 @@ struct bnxt_bs_trace_info { static inline void bnxt_bs_trace_check_wrap(struct bnxt_bs_trace_info *bs_trace, u32 offset) { - if (!bs_trace->wrapped && + if (!bs_trace->wrapped && bs_trace->magic_byte && *bs_trace->magic_byte != BNXT_TRACE_BUF_MAGIC_BYTE) bs_trace->wrapped = 1; bs_trace->last_offset = offset; From 28d9a84ef0ce56cc623da2a1ebf7583c00d52b31 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 3 Nov 2025 16:56:58 -0800 Subject: [PATCH 0720/1075] bnxt_en: Always provide max entry and entry size in coredump segments While populating firmware host logging segments for the coredump, it is possible for the FW command that flushes the segment to fail. When that happens, the existing code will not update the max entry and entry size in the segment header and this causes software that decodes the coredump to skip the segment. The segment most likely has already collected some DMA data, so always update these 2 segment fields in the header to allow the decoder to decode any data in the segment. Fixes: 3c2179e66355 ("bnxt_en: Add FW trace coredump segments to the coredump") Reviewed-by: Shruti Parab Signed-off-by: Kashyap Desai Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 0181ab1f2dfd..ccb8b509662d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -333,13 +333,14 @@ static void bnxt_fill_drv_seg_record(struct bnxt *bp, u32 offset = 0; int rc = 0; + record->max_entries = cpu_to_le32(ctxm->max_entries); + record->entry_size = cpu_to_le32(ctxm->entry_size); + rc = bnxt_dbg_hwrm_log_buffer_flush(bp, type, 0, &offset); if (rc) return; bnxt_bs_trace_check_wrap(bs_trace, offset); - record->max_entries = cpu_to_le32(ctxm->max_entries); - record->entry_size = cpu_to_le32(ctxm->entry_size); record->offset = cpu_to_le32(bs_trace->last_offset); record->wrapped = bs_trace->wrapped; } From 5204943a4c6efc832993c0fa17dec275071eeccc Mon Sep 17 00:00:00 2001 From: Shantiprasad Shettar Date: Mon, 3 Nov 2025 16:56:59 -0800 Subject: [PATCH 0721/1075] bnxt_en: Fix warning in bnxt_dl_reload_down() The existing code calls bnxt_cancel_reservations() after bnxt_hwrm_func_drv_unrgtr() in bnxt_dl_reload_down(). bnxt_cancel_reservations() calls the FW and it will always fail since the driver has already unregistered, triggering this warning: bnxt_en 0000:0a:00.0 ens2np0: resc_qcaps failed Fix it by calling bnxt_clear_reservations() which will skip the unnecessary FW call since we have unregistered. Fixes: 228ea8c187d8 ("bnxt_en: implement devlink dev reload driver_reinit") Reviewed-by: Mohammad Shuab Siddique Reviewed-by: Somnath Kotur Reviewed-by: Kalesh AP Signed-off-by: Shantiprasad Shettar Signed-off-by: Michael Chan Link: https://patch.msgid.link/20251104005700.542174-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c0e9caa1df73..a625e7c311dd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12439,7 +12439,7 @@ static int bnxt_try_recover_fw(struct bnxt *bp) return -ENODEV; } -static void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset) +void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 7df46a21dd18..3613a172483a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2941,6 +2941,7 @@ void bnxt_report_link(struct bnxt *bp); int bnxt_update_link(struct bnxt *bp, bool chng_link_state); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); +void bnxt_clear_reservations(struct bnxt *bp, bool fw_reset); int bnxt_cancel_reservations(struct bnxt *bp, bool fw_reset); int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp); int bnxt_hwrm_free_wol_fltr(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 02961d93ed35..67ca02d84c97 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -461,7 +461,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change, rtnl_unlock(); break; } - bnxt_cancel_reservations(bp, false); + bnxt_clear_reservations(bp, false); bnxt_free_ctx_mem(bp, false); break; } From ea0714d61dea6e00b853a0116d0afe2b2fe70ef3 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 4 Nov 2025 22:54:25 +0000 Subject: [PATCH 0722/1075] bpf:add _impl suffix for bpf_task_work_schedule* kfuncs Rename: bpf_task_work_schedule_resume()->bpf_task_work_schedule_resume_impl() bpf_task_work_schedule_signal()->bpf_task_work_schedule_signal_impl() This aligns task work scheduling kfuncs with the established naming scheme for kfuncs with the bpf_prog_aux argument provided by the verifier implicitly. This convention will be taken advantage of with the upcoming KF_IMPLICIT_ARGS feature to preserve backwards compatibility to BPF programs. Acked-by: Andrii Nakryiko Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20251104-implv2-v3-1-4772b9ae0e06@meta.com Signed-off-by: Alexei Starovoitov Acked-by: Ihor Solodrai --- kernel/bpf/helpers.c | 24 +++++++++++-------- kernel/bpf/verifier.c | 12 +++++----- tools/testing/selftests/bpf/progs/task_work.c | 6 ++--- .../selftests/bpf/progs/task_work_fail.c | 8 +++---- .../selftests/bpf/progs/task_work_stress.c | 4 ++-- 5 files changed, 29 insertions(+), 25 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index eb25e70e0bdc..33173b027ccf 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4169,7 +4169,8 @@ static int bpf_task_work_schedule(struct task_struct *task, struct bpf_task_work } /** - * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode + * bpf_task_work_schedule_signal_impl - Schedule BPF callback using task_work_add with TWA_SIGNAL + * mode * @task: Task struct for which callback should be scheduled * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping * @map__map: bpf_map that embeds struct bpf_task_work in the values @@ -4178,15 +4179,17 @@ static int bpf_task_work_schedule(struct task_struct *task, struct bpf_task_work * * Return: 0 if task work has been scheduled successfully, negative error code otherwise */ -__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw, - void *map__map, bpf_task_work_callback_t callback, - void *aux__prog) +__bpf_kfunc int bpf_task_work_schedule_signal_impl(struct task_struct *task, + struct bpf_task_work *tw, void *map__map, + bpf_task_work_callback_t callback, + void *aux__prog) { return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_SIGNAL); } /** - * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode + * bpf_task_work_schedule_resume_impl - Schedule BPF callback using task_work_add with TWA_RESUME + * mode * @task: Task struct for which callback should be scheduled * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping * @map__map: bpf_map that embeds struct bpf_task_work in the values @@ -4195,9 +4198,10 @@ __bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct b * * Return: 0 if task work has been scheduled successfully, negative error code otherwise */ -__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw, - void *map__map, bpf_task_work_callback_t callback, - void *aux__prog) +__bpf_kfunc int bpf_task_work_schedule_resume_impl(struct task_struct *task, + struct bpf_task_work *tw, void *map__map, + bpf_task_work_callback_t callback, + void *aux__prog) { return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME); } @@ -4377,8 +4381,8 @@ BTF_ID_FLAGS(func, bpf_strnstr); BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) #endif BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_work_schedule_signal_impl, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_work_schedule_resume_impl, KF_TRUSTED_ARGS) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ff40e5e65c43..8314518c8d93 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12259,8 +12259,8 @@ enum special_kfunc_type { KF_bpf_res_spin_lock_irqsave, KF_bpf_res_spin_unlock_irqrestore, KF___bpf_trap, - KF_bpf_task_work_schedule_signal, - KF_bpf_task_work_schedule_resume, + KF_bpf_task_work_schedule_signal_impl, + KF_bpf_task_work_schedule_resume_impl, }; BTF_ID_LIST(special_kfunc_list) @@ -12331,13 +12331,13 @@ BTF_ID(func, bpf_res_spin_unlock) BTF_ID(func, bpf_res_spin_lock_irqsave) BTF_ID(func, bpf_res_spin_unlock_irqrestore) BTF_ID(func, __bpf_trap) -BTF_ID(func, bpf_task_work_schedule_signal) -BTF_ID(func, bpf_task_work_schedule_resume) +BTF_ID(func, bpf_task_work_schedule_signal_impl) +BTF_ID(func, bpf_task_work_schedule_resume_impl) static bool is_task_work_add_kfunc(u32 func_id) { - return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] || - func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume]; + return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal_impl] || + func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume_impl]; } static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c index 23217f06a3ec..663a80990f8f 100644 --- a/tools/testing/selftests/bpf/progs/task_work.c +++ b/tools/testing/selftests/bpf/progs/task_work.c @@ -66,7 +66,7 @@ int oncpu_hash_map(struct pt_regs *args) if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL); return 0; } @@ -80,7 +80,7 @@ int oncpu_array_map(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL); + bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL); return 0; } @@ -102,6 +102,6 @@ int oncpu_lru_map(struct pt_regs *args) work = bpf_map_lookup_elem(&lrumap, &key); if (!work || work->data[0]) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c index 77fe8f28facd..1270953fd092 100644 --- a/tools/testing/selftests/bpf/progs/task_work_fail.c +++ b/tools/testing/selftests/bpf/progs/task_work_fail.c @@ -53,7 +53,7 @@ int mismatch_map(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL); return 0; } @@ -65,7 +65,7 @@ int no_map_task_work(struct pt_regs *args) struct bpf_task_work tw; task = bpf_get_current_task_btf(); - bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL); return 0; } @@ -76,7 +76,7 @@ int task_work_null(struct pt_regs *args) struct task_struct *task; task = bpf_get_current_task_btf(); - bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL); return 0; } @@ -91,6 +91,6 @@ int map_null(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c index 90fca06fff56..55e555f7f41b 100644 --- a/tools/testing/selftests/bpf/progs/task_work_stress.c +++ b/tools/testing/selftests/bpf/progs/task_work_stress.c @@ -51,8 +51,8 @@ int schedule_task_work(void *ctx) if (!work) return 0; } - err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap, - process_work, NULL); + err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap, + process_work, NULL); if (err) __sync_fetch_and_add(&schedule_error, 1); else From 137cc92ffe2e71705fce112656a460d924934ebe Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 4 Nov 2025 22:54:26 +0000 Subject: [PATCH 0723/1075] bpf: add _impl suffix for bpf_stream_vprintk() kfunc Rename bpf_stream_vprintk() to bpf_stream_vprintk_impl(). This makes bpf_stream_vprintk() follow the already established "_impl" suffix-based naming convention for kfuncs with the bpf_prog_aux argument provided by the verifier implicitly. This convention will be taken advantage of with the upcoming KF_IMPLICIT_ARGS feature to preserve backwards compatibility to BPF programs. Acked-by: Andrii Nakryiko Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20251104-implv2-v3-2-4772b9ae0e06@meta.com Signed-off-by: Alexei Starovoitov Acked-by: Ihor Solodrai --- kernel/bpf/helpers.c | 2 +- kernel/bpf/stream.c | 3 ++- .../bpftool/Documentation/bpftool-prog.rst | 2 +- tools/lib/bpf/bpf_helpers.h | 26 +++++++++---------- .../testing/selftests/bpf/progs/stream_fail.c | 6 ++--- 5 files changed, 20 insertions(+), 19 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 33173b027ccf..e4007fea4909 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4380,7 +4380,7 @@ BTF_ID_FLAGS(func, bpf_strnstr); #if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS) BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) #endif -BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_stream_vprintk_impl, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_signal_impl, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_resume_impl, KF_TRUSTED_ARGS) BTF_KFUNCS_END(common_btf_ids) diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c index eb6c5a21c2ef..ff16c631951b 100644 --- a/kernel/bpf/stream.c +++ b/kernel/bpf/stream.c @@ -355,7 +355,8 @@ __bpf_kfunc_start_defs(); * Avoid using enum bpf_stream_id so that kfunc users don't have to pull in the * enum in headers. */ -__bpf_kfunc int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, u32 len__sz, void *aux__prog) +__bpf_kfunc int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args, + u32 len__sz, void *aux__prog) { struct bpf_bprintf_data data = { .get_bin_args = true, diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 009633294b09..35aeeaf5f711 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -182,7 +182,7 @@ bpftool prog tracelog bpftool prog tracelog { stdout | stderr } *PROG* Dump the BPF stream of the program. BPF programs can write to these streams - at runtime with the **bpf_stream_vprintk**\ () kfunc. The kernel may write + at runtime with the **bpf_stream_vprintk_impl**\ () kfunc. The kernel may write error messages to the standard error stream. This facility should be used only for debugging purposes. diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 80c028540656..d4e4e388e625 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -315,20 +315,20 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) -extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, - __u32 len__sz, void *aux__prog) __weak __ksym; +extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args, + __u32 len__sz, void *aux__prog) __weak __ksym; -#define bpf_stream_printk(stream_id, fmt, args...) \ -({ \ - static const char ___fmt[] = fmt; \ - unsigned long long ___param[___bpf_narg(args)]; \ - \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ - \ - bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param), NULL);\ +#define bpf_stream_printk(stream_id, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_stream_vprintk_impl(stream_id, ___fmt, ___param, sizeof(___param), NULL); \ }) /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c index b4a0d0cc8ec8..3662515f0107 100644 --- a/tools/testing/selftests/bpf/progs/stream_fail.c +++ b/tools/testing/selftests/bpf/progs/stream_fail.c @@ -10,7 +10,7 @@ SEC("syscall") __failure __msg("Possibly NULL pointer passed") int stream_vprintk_null_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL); return 0; } @@ -18,7 +18,7 @@ SEC("syscall") __failure __msg("R3 type=scalar expected=") int stream_vprintk_scalar_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL); return 0; } @@ -26,7 +26,7 @@ SEC("syscall") __failure __msg("arg#1 doesn't point to a const string") int stream_vprintk_string_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL); return 0; } From 0c716703965ffc5ef4311b65cb5d84a703784717 Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Thu, 30 Oct 2025 21:44:38 +0700 Subject: [PATCH 0724/1075] virtio-net: fix received length check in big packets Since commit 4959aebba8c0 ("virtio-net: use mtu size as buffer length for big packets"), when guest gso is off, the allocated size for big packets is not MAX_SKB_FRAGS * PAGE_SIZE anymore but depends on negotiated MTU. The number of allocated frags for big packets is stored in vi->big_packets_num_skbfrags. Because the host announced buffer length can be malicious (e.g. the host vhost_net driver's get_rx_bufs is modified to announce incorrect length), we need a check in virtio_net receive path. Currently, the check is not adapted to the new change which can lead to NULL page pointer dereference in the below while loop when receiving length that is larger than the allocated one. This commit fixes the received length check corresponding to the new change. Fixes: 4959aebba8c0 ("virtio-net: use mtu size as buffer length for big packets") Cc: stable@vger.kernel.org Signed-off-by: Bui Quang Minh Reviewed-by: Xuan Zhuo Tested-by: Lei Yang Link: https://patch.msgid.link/20251030144438.7582-1-minhquangbui99@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e6e650bc3bc3..8855a994e12b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -910,17 +910,6 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, goto ok; } - /* - * Verify that we can indeed put this data into a skb. - * This is here to handle cases when the device erroneously - * tries to receive more than is possible. This is usually - * the case of a broken device. - */ - if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) { - net_dbg_ratelimited("%s: too much data\n", skb->dev->name); - dev_kfree_skb(skb); - return NULL; - } BUG_ON(offset >= PAGE_SIZE); while (len) { unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len); @@ -2112,9 +2101,19 @@ static struct sk_buff *receive_big(struct net_device *dev, struct virtnet_rq_stats *stats) { struct page *page = buf; - struct sk_buff *skb = - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); + struct sk_buff *skb; + /* Make sure that len does not exceed the size allocated in + * add_recvbuf_big. + */ + if (unlikely(len > (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE)) { + pr_debug("%s: rx error: len %u exceeds allocated size %lu\n", + dev->name, len, + (vi->big_packets_num_skbfrags + 1) * PAGE_SIZE); + goto err; + } + + skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0); u64_stats_add(&stats->bytes, len - vi->hdr_len); if (unlikely(!skb)) goto err; From 90a88306eb874fe4bbdd860e6c9787f5bbc588b5 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 3 Nov 2025 10:28:11 -0600 Subject: [PATCH 0725/1075] net: ethernet: ti: netcp: Standardize knav_dma_open_channel to return NULL on error Make knav_dma_open_channel consistently return NULL on error instead of ERR_PTR. Currently the header include/linux/soc/ti/knav_dma.h returns NULL when the driver is disabled, but the driver implementation does not even return NULL or ERR_PTR on failure, causing inconsistency in the users. This results in a crash in netcp_free_navigator_resources as followed (trimmed): Unhandled fault: alignment exception (0x221) at 0xfffffff2 [fffffff2] *pgd=80000800207003, *pmd=82ffda003, *pte=00000000 Internal error: : 221 [#1] SMP ARM Modules linked in: CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.17.0-rc7 #1 NONE Hardware name: Keystone PC is at knav_dma_close_channel+0x30/0x19c LR is at netcp_free_navigator_resources+0x2c/0x28c [... TRIM...] Call trace: knav_dma_close_channel from netcp_free_navigator_resources+0x2c/0x28c netcp_free_navigator_resources from netcp_ndo_open+0x430/0x46c netcp_ndo_open from __dev_open+0x114/0x29c __dev_open from __dev_change_flags+0x190/0x208 __dev_change_flags from netif_change_flags+0x1c/0x58 netif_change_flags from dev_change_flags+0x38/0xa0 dev_change_flags from ip_auto_config+0x2c4/0x11f0 ip_auto_config from do_one_initcall+0x58/0x200 do_one_initcall from kernel_init_freeable+0x1cc/0x238 kernel_init_freeable from kernel_init+0x1c/0x12c kernel_init from ret_from_fork+0x14/0x38 [... TRIM...] Standardize the error handling by making the function return NULL on all error conditions. The API is used in just the netcp_core.c so the impact is limited. Note, this change, in effect reverts commit 5b6cb43b4d62 ("net: ethernet: ti: netcp_core: return error while dma channel open issue"), but provides a less error prone implementation. Suggested-by: Simon Horman Suggested-by: Jacob Keller Signed-off-by: Nishanth Menon Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251103162811.3730055-1-nm@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/netcp_core.c | 10 +++++----- drivers/soc/ti/knav_dma.c | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 857820657bac..5ee13db568f0 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1338,10 +1338,10 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) tx_pipe->dma_channel = knav_dma_open_channel(dev, tx_pipe->dma_chan_name, &config); - if (IS_ERR(tx_pipe->dma_channel)) { + if (!tx_pipe->dma_channel) { dev_err(dev, "failed opening tx chan(%s)\n", tx_pipe->dma_chan_name); - ret = PTR_ERR(tx_pipe->dma_channel); + ret = -EINVAL; goto err; } @@ -1359,7 +1359,7 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) return 0; err: - if (!IS_ERR_OR_NULL(tx_pipe->dma_channel)) + if (tx_pipe->dma_channel) knav_dma_close_channel(tx_pipe->dma_channel); tx_pipe->dma_channel = NULL; return ret; @@ -1678,10 +1678,10 @@ static int netcp_setup_navigator_resources(struct net_device *ndev) netcp->rx_channel = knav_dma_open_channel(netcp->netcp_device->device, netcp->dma_chan_name, &config); - if (IS_ERR(netcp->rx_channel)) { + if (!netcp->rx_channel) { dev_err(netcp->ndev_dev, "failed opening rx chan(%s\n", netcp->dma_chan_name); - ret = PTR_ERR(netcp->rx_channel); + ret = -EINVAL; goto fail; } diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c index a25ebe6cd503..553ae7ee20f1 100644 --- a/drivers/soc/ti/knav_dma.c +++ b/drivers/soc/ti/knav_dma.c @@ -402,7 +402,7 @@ static int of_channel_match_helper(struct device_node *np, const char *name, * @name: slave channel name * @config: dma configuration parameters * - * Returns pointer to appropriate DMA channel on success or error. + * Return: Pointer to appropriate DMA channel on success or NULL on error. */ void *knav_dma_open_channel(struct device *dev, const char *name, struct knav_dma_cfg *config) @@ -414,13 +414,13 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (!kdev) { pr_err("keystone-navigator-dma driver not registered\n"); - return (void *)-EINVAL; + return NULL; } chan_num = of_channel_match_helper(dev->of_node, name, &instance); if (chan_num < 0) { dev_err(kdev->dev, "No DMA instance with name %s\n", name); - return (void *)-EINVAL; + return NULL; } dev_dbg(kdev->dev, "initializing %s channel %d from DMA %s\n", @@ -431,7 +431,7 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (config->direction != DMA_MEM_TO_DEV && config->direction != DMA_DEV_TO_MEM) { dev_err(kdev->dev, "bad direction\n"); - return (void *)-EINVAL; + return NULL; } /* Look for correct dma instance */ @@ -443,7 +443,7 @@ void *knav_dma_open_channel(struct device *dev, const char *name, } if (!dma) { dev_err(kdev->dev, "No DMA instance with name %s\n", instance); - return (void *)-EINVAL; + return NULL; } /* Look for correct dma channel from dma instance */ @@ -463,14 +463,14 @@ void *knav_dma_open_channel(struct device *dev, const char *name, if (!chan) { dev_err(kdev->dev, "channel %d is not in DMA %s\n", chan_num, instance); - return (void *)-EINVAL; + return NULL; } if (atomic_read(&chan->ref_count) >= 1) { if (!check_config(chan, config)) { dev_err(kdev->dev, "channel %d config miss-match\n", chan_num); - return (void *)-EINVAL; + return NULL; } } From 327c20c21d80e0d87834b392d83ae73c955ad8ff Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 3 Nov 2025 08:38:17 -0800 Subject: [PATCH 0726/1075] netpoll: Fix deadlock in memory allocation under spinlock Fix a AA deadlock in refill_skbs() where memory allocation while holding skb_pool->lock can trigger a recursive lock acquisition attempt. The deadlock scenario occurs when the system is under severe memory pressure: 1. refill_skbs() acquires skb_pool->lock (spinlock) 2. alloc_skb() is called while holding the lock 3. Memory allocator fails and calls slab_out_of_memory() 4. This triggers printk() for the OOM warning 5. The console output path calls netpoll_send_udp() 6. netpoll_send_udp() attempts to acquire the same skb_pool->lock 7. Deadlock: the lock is already held by the same CPU Call stack: refill_skbs() spin_lock_irqsave(&skb_pool->lock) <- lock acquired __alloc_skb() kmem_cache_alloc_node_noprof() slab_out_of_memory() printk() console_flush_all() netpoll_send_udp() skb_dequeue() spin_lock_irqsave(&skb_pool->lock) <- deadlock attempt This bug was exposed by commit 248f6571fd4c51 ("netpoll: Optimize skb refilling on critical path") which removed refill_skbs() from the critical path (where nested printk was being deferred), letting nested printk being called from inside refill_skbs() Refactor refill_skbs() to never allocate memory while holding the spinlock. Another possible solution to fix this problem is protecting the refill_skbs() from nested printks, basically calling printk_deferred_{enter,exit}() in refill_skbs(), then, any nested pr_warn() would be deferred. I prefer this approach, given I _think_ it might be a good idea to move the alloc_skb() from GFP_ATOMIC to GFP_KERNEL in the future, so, having the alloc_skb() outside of the lock will be necessary step. There is a possible TOCTOU issue when checking for the pool length, and queueing the new allocated skb, but, this is not an issue, given that an extra SKB in the pool is harmless and it will be eventually used. Signed-off-by: Breno Leitao Fixes: 248f6571fd4c51 ("netpoll: Optimize skb refilling on critical path") Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251103-fix_netpoll_aa-v4-1-4cfecdf6da7c@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 60a05d3b7c24..c85f740065fc 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,19 +228,16 @@ static void refill_skbs(struct netpoll *np) { struct sk_buff_head *skb_pool; struct sk_buff *skb; - unsigned long flags; skb_pool = &np->skb_pool; - spin_lock_irqsave(&skb_pool->lock, flags); - while (skb_pool->qlen < MAX_SKBS) { + while (READ_ONCE(skb_pool->qlen) < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - __skb_queue_tail(skb_pool, skb); + skb_queue_tail(skb_pool, skb); } - spin_unlock_irqrestore(&skb_pool->lock, flags); } static void zap_completion_queue(void) From 5232334baec371a3c9d9192ba7d2da2d88a85333 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Mon, 29 Sep 2025 12:03:13 +0200 Subject: [PATCH 0727/1075] gpio: aggregator: restore the set_config operation Restore the set_config operation, as it was lost during the refactoring of the gpio-aggregator driver while creating the gpio forwarder library. Fixes: b31c68fd851e7 ("gpio: aggregator: handle runtime registration of gpio_desc in gpiochip_fwd") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202509281206.a7334ae8-lkp@intel.com Signed-off-by: Thomas Richard Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20250929-gpio-aggregator-fix-set-config-callback-v1-1-39046e1da609@bootlin.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aggregator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 37600faf4a4b..416f265d09d0 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -723,6 +723,7 @@ struct gpiochip_fwd *devm_gpiochip_fwd_alloc(struct device *dev, chip->get_multiple = gpio_fwd_get_multiple_locked; chip->set = gpio_fwd_set; chip->set_multiple = gpio_fwd_set_multiple_locked; + chip->set_config = gpio_fwd_set_config; chip->to_irq = gpio_fwd_to_irq; chip->base = -1; chip->ngpio = ngpios; From 487df8b698345dd5a91346335f05170ed5f29d4e Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 4 Nov 2025 10:53:57 +0100 Subject: [PATCH 0728/1075] drm/sched: Fix deadlock in drm_sched_entity_kill_jobs_cb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Mesa issue referenced below pointed out a possible deadlock: [ 1231.611031] Possible interrupt unsafe locking scenario: [ 1231.611033] CPU0 CPU1 [ 1231.611034] ---- ---- [ 1231.611035] lock(&xa->xa_lock#17); [ 1231.611038] local_irq_disable(); [ 1231.611039] lock(&fence->lock); [ 1231.611041] lock(&xa->xa_lock#17); [ 1231.611044] [ 1231.611045] lock(&fence->lock); [ 1231.611047] *** DEADLOCK *** In this example, CPU0 would be any function accessing job->dependencies through the xa_* functions that don't disable interrupts (eg: drm_sched_job_add_dependency(), drm_sched_entity_kill_jobs_cb()). CPU1 is executing drm_sched_entity_kill_jobs_cb() as a fence signalling callback so in an interrupt context. It will deadlock when trying to grab the xa_lock which is already held by CPU0. Replacing all xa_* usage by their xa_*_irq counterparts would fix this issue, but Christian pointed out another issue: dma_fence_signal takes fence.lock and so does dma_fence_add_callback. dma_fence_signal() // locks f1.lock -> drm_sched_entity_kill_jobs_cb() -> foreach dependencies -> dma_fence_add_callback() // locks f2.lock This will deadlock if f1 and f2 share the same spinlock. To fix both issues, the code iterating on dependencies and re-arming them is moved out to drm_sched_entity_kill_jobs_work(). Cc: stable@vger.kernel.org # v6.2+ Fixes: 2fdb8a8f07c2 ("drm/scheduler: rework entity flush, kill and fini") Link: https://gitlab.freedesktop.org/mesa/mesa/-/issues/13908 Reported-by: Mikhail Gavrilov Suggested-by: Christian König Reviewed-by: Christian König Signed-off-by: Pierre-Eric Pelloux-Prayer [phasta: commit message nits] Signed-off-by: Philipp Stanner Link: https://patch.msgid.link/20251104095358.15092-1-pierre-eric.pelloux-prayer@amd.com --- drivers/gpu/drm/scheduler/sched_entity.c | 34 +++++++++++++----------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index c8e949f4a568..fe174a4857be 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -173,26 +173,15 @@ int drm_sched_entity_error(struct drm_sched_entity *entity) } EXPORT_SYMBOL(drm_sched_entity_error); +static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, + struct dma_fence_cb *cb); + static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) { struct drm_sched_job *job = container_of(wrk, typeof(*job), work); - - drm_sched_fence_scheduled(job->s_fence, NULL); - drm_sched_fence_finished(job->s_fence, -ESRCH); - WARN_ON(job->s_fence->parent); - job->sched->ops->free_job(job); -} - -/* Signal the scheduler finished fence when the entity in question is killed. */ -static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, - struct dma_fence_cb *cb) -{ - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, - finish_cb); + struct dma_fence *f; unsigned long index; - dma_fence_put(f); - /* Wait for all dependencies to avoid data corruptions */ xa_for_each(&job->dependencies, index, f) { struct drm_sched_fence *s_fence = to_drm_sched_fence(f); @@ -220,6 +209,21 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, dma_fence_put(f); } + drm_sched_fence_scheduled(job->s_fence, NULL); + drm_sched_fence_finished(job->s_fence, -ESRCH); + WARN_ON(job->s_fence->parent); + job->sched->ops->free_job(job); +} + +/* Signal the scheduler finished fence when the entity in question is killed. */ +static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, + struct dma_fence_cb *cb) +{ + struct drm_sched_job *job = container_of(cb, struct drm_sched_job, + finish_cb); + + dma_fence_put(f); + INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); schedule_work(&job->work); } From 636f4618b1cd96f6b5a2b8c7c4f665c8533ecf13 Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Wed, 29 Oct 2025 01:28:28 +0800 Subject: [PATCH 0729/1075] regulator: fixed: fix GPIO descriptor leak on register failure In the commit referenced by the Fixes tag, devm_gpiod_get_optional() was replaced by manual GPIO management, relying on the regulator core to release the GPIO descriptor. However, this approach does not account for the error path: when regulator registration fails, the core never takes over the GPIO, resulting in a resource leak. Add gpiod_put() before returning on regulator registration failure. Fixes: 5e6f3ae5c13b ("regulator: fixed: Let core handle GPIO descriptor") Signed-off-by: Haotian Zhang Link: https://patch.msgid.link/20251028172828.625-1-vulab@iscas.ac.cn Signed-off-by: Mark Brown --- drivers/regulator/fixed.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c index 1cb647ed70c6..a2d16e9abfb5 100644 --- a/drivers/regulator/fixed.c +++ b/drivers/regulator/fixed.c @@ -334,6 +334,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev) ret = dev_err_probe(&pdev->dev, PTR_ERR(drvdata->dev), "Failed to register regulator: %ld\n", PTR_ERR(drvdata->dev)); + gpiod_put(cfg.ena_gpiod); return ret; } From a50f7456f853ec3a6f07cbe1d16ad8a8b2501320 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 30 Oct 2025 14:05:27 +0000 Subject: [PATCH 0730/1075] dma-mapping: Allow use of DMA_BIT_MASK(64) in global scope Clang doesn't like that (1ULL<<(64)) overflows when initializing a global scope variable, even if that part of the ternary isn't used when n = 64. The same initialization can be done without warnings in function scopes, and GCC doesn't mind either way. The build failure that highlighted this was already fixed in a different way [1], which also has detailed links to the Clang issues. However it's not going to be long before the same thing happens again, so it's better to fix the root cause. Fix it by using GENMASK_ULL() which does exactly the same thing, is much more readable anyway, and doesn't have a shift that overflows. [1]: https://lore.kernel.org/all/20250918-mmp-pdma-simplify-dma-addressing-v1-1-5c2be2b85696@riscstar.com/ Signed-off-by: James Clark Reviewed-by: Nathan Chancellor Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20251030-james-fix-dma_bit_mask-v1-1-ad1ce7cfab6e@linaro.org --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 8248ff9363ee..2ceda49c609f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -90,7 +90,7 @@ */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) -#define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) +#define DMA_BIT_MASK(n) GENMASK_ULL(n - 1, 0) struct dma_iova_state { dma_addr_t addr; From 63b5aa01da0f38cdbd97d021477258e511631497 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 4 Nov 2025 20:50:06 +0800 Subject: [PATCH 0731/1075] vfat: fix missing sb_min_blocksize() return value checks When emulating an nvme device on qemu with both logical_block_size and physical_block_size set to 8 KiB, but without format, a kernel panic was triggered during the early boot stage while attempting to mount a vfat filesystem. [95553.682035] EXT4-fs (nvme0n1): unable to set blocksize [95553.684326] EXT4-fs (nvme0n1): unable to set blocksize [95553.686501] EXT4-fs (nvme0n1): unable to set blocksize [95553.696448] ISOFS: unsupported/invalid hardware sector size 8192 [95553.697117] ------------[ cut here ]------------ [95553.697567] kernel BUG at fs/buffer.c:1582! [95553.697984] Oops: invalid opcode: 0000 [#1] SMP NOPTI [95553.698602] CPU: 0 UID: 0 PID: 7212 Comm: mount Kdump: loaded Not tainted 6.18.0-rc2+ #38 PREEMPT(voluntary) [95553.699511] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 [95553.700534] RIP: 0010:folio_alloc_buffers+0x1bb/0x1c0 [95553.701018] Code: 48 8b 15 e8 93 18 02 65 48 89 35 e0 93 18 02 48 83 c4 10 5b 41 5c 41 5d 41 5e 41 5f 5d 31 d2 31 c9 31 f6 31 ff c3 cc cc cc cc <0f> 0b 90 66 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f [95553.702648] RSP: 0018:ffffd1b0c676f990 EFLAGS: 00010246 [95553.703132] RAX: ffff8cfc4176d820 RBX: 0000000000508c48 RCX: 0000000000000001 [95553.703805] RDX: 0000000000002000 RSI: 0000000000000000 RDI: 0000000000000000 [95553.704481] RBP: ffffd1b0c676f9c8 R08: 0000000000000000 R09: 0000000000000000 [95553.705148] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000001 [95553.705816] R13: 0000000000002000 R14: fffff8bc8257e800 R15: 0000000000000000 [95553.706483] FS: 000072ee77315840(0000) GS:ffff8cfdd2c8d000(0000) knlGS:0000000000000000 [95553.707248] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [95553.707782] CR2: 00007d8f2a9e5a20 CR3: 0000000039d0c006 CR4: 0000000000772ef0 [95553.708439] PKRU: 55555554 [95553.708734] Call Trace: [95553.709015] [95553.709266] __getblk_slow+0xd2/0x230 [95553.709641] ? find_get_block_common+0x8b/0x530 [95553.710084] bdev_getblk+0x77/0xa0 [95553.710449] __bread_gfp+0x22/0x140 [95553.710810] fat_fill_super+0x23a/0xfc0 [95553.711216] ? __pfx_setup+0x10/0x10 [95553.711580] ? __pfx_vfat_fill_super+0x10/0x10 [95553.712014] vfat_fill_super+0x15/0x30 [95553.712401] get_tree_bdev_flags+0x141/0x1e0 [95553.712817] get_tree_bdev+0x10/0x20 [95553.713177] vfat_get_tree+0x15/0x20 [95553.713550] vfs_get_tree+0x2a/0x100 [95553.713910] vfs_cmd_create+0x62/0xf0 [95553.714273] __do_sys_fsconfig+0x4e7/0x660 [95553.714669] __x64_sys_fsconfig+0x20/0x40 [95553.715062] x64_sys_call+0x21ee/0x26a0 [95553.715453] do_syscall_64+0x80/0x670 [95553.715816] ? __fs_parse+0x65/0x1e0 [95553.716172] ? fat_parse_param+0x103/0x4b0 [95553.716587] ? vfs_parse_fs_param_source+0x21/0xa0 [95553.717034] ? __do_sys_fsconfig+0x3d9/0x660 [95553.717548] ? __x64_sys_fsconfig+0x20/0x40 [95553.717957] ? x64_sys_call+0x21ee/0x26a0 [95553.718360] ? do_syscall_64+0xb8/0x670 [95553.718734] ? __x64_sys_fsconfig+0x20/0x40 [95553.719141] ? x64_sys_call+0x21ee/0x26a0 [95553.719545] ? do_syscall_64+0xb8/0x670 [95553.719922] ? x64_sys_call+0x1405/0x26a0 [95553.720317] ? do_syscall_64+0xb8/0x670 [95553.720702] ? __x64_sys_close+0x3e/0x90 [95553.721080] ? x64_sys_call+0x1b5e/0x26a0 [95553.721478] ? do_syscall_64+0xb8/0x670 [95553.721841] ? irqentry_exit+0x43/0x50 [95553.722211] ? exc_page_fault+0x90/0x1b0 [95553.722681] entry_SYSCALL_64_after_hwframe+0x76/0x7e [95553.723166] RIP: 0033:0x72ee774f3afe [95553.723562] Code: 73 01 c3 48 8b 0d 0a 33 0f 00 f7 d8 64 89 01 48 83 c8 ff c3 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 49 89 ca b8 af 01 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d da 32 0f 00 f7 d8 64 89 01 48 [95553.725188] RSP: 002b:00007ffe97148978 EFLAGS: 00000246 ORIG_RAX: 00000000000001af [95553.725892] RAX: ffffffffffffffda RBX: 00005dcfe53d0080 RCX: 000072ee774f3afe [95553.726526] RDX: 0000000000000000 RSI: 0000000000000006 RDI: 0000000000000003 [95553.727176] RBP: 00007ffe97148ac0 R08: 0000000000000000 R09: 000072ee775e7ac0 [95553.727818] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [95553.728459] R13: 00005dcfe53d04b0 R14: 000072ee77670b00 R15: 00005dcfe53d1a28 [95553.729086] The panic occurs as follows: 1. logical_block_size is 8KiB, causing {struct super_block *sb}->s_blocksize is initialized to 0. vfat_fill_super - fat_fill_super - sb_min_blocksize - sb_set_blocksize //return 0 when size is 8KiB. 2. __bread_gfp is called with size == 0, causing folio_alloc_buffers() to compute an offset equal to folio_size(folio), which triggers a BUG_ON. fat_fill_super - sb_bread - __bread_gfp // size == {struct super_block *sb}->s_blocksize == 0 - bdev_getblk - __getblk_slow - grow_buffers - grow_dev_folio - folio_alloc_buffers // size == 0 - folio_set_bh //offset == folio_size(folio) and panic To fix this issue, add proper return value checks for sb_min_blocksize(). Cc: stable@vger.kernel.org # v6.15 Fixes: a64e5a596067bd ("bdev: add back PAGE_SIZE block size validation for sb_set_blocksize()") Reviewed-by: Matthew Wilcox Reviewed-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: OGAWA Hirofumi Reviewed-by: Christoph Hellwig Signed-off-by: Yongpeng Yang Link: https://patch.msgid.link/20251104125009.2111925-2-yangyongpeng.storage@gmail.com Acked-by: OGAWA Hirofumi Signed-off-by: Christian Brauner --- fs/fat/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 9648ed097816..9cfe20a3daaf 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1595,8 +1595,12 @@ int fat_fill_super(struct super_block *sb, struct fs_context *fc, setup(sb); /* flavour-specific stuff that needs options */ + error = -EINVAL; + if (!sb_min_blocksize(sb, 512)) { + fat_msg(sb, KERN_ERR, "unable to set blocksize"); + goto out_fail; + } error = -EIO; - sb_min_blocksize(sb, 512); bh = sb_bread(sb, 0); if (bh == NULL) { fat_msg(sb, KERN_ERR, "unable to read boot sector"); From f2c1f631630e01821fe4c3fdf6077bc7a8284f82 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 4 Nov 2025 20:50:07 +0800 Subject: [PATCH 0732/1075] exfat: check return value of sb_min_blocksize in exfat_read_boot_sector sb_min_blocksize() may return 0. Check its return value to avoid accessing the filesystem super block when sb->s_blocksize is 0. Cc: stable@vger.kernel.org # v6.15 Fixes: 719c1e1829166d ("exfat: add super block operations") Reviewed-by: Christoph Hellwig Signed-off-by: Yongpeng Yang Link: https://patch.msgid.link/20251104125009.2111925-3-yangyongpeng.storage@gmail.com Signed-off-by: Christian Brauner --- fs/exfat/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 7f9592856bf7..74d451f732c7 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -433,7 +433,10 @@ static int exfat_read_boot_sector(struct super_block *sb) struct exfat_sb_info *sbi = EXFAT_SB(sb); /* set block size to read super block */ - sb_min_blocksize(sb, 512); + if (!sb_min_blocksize(sb, 512)) { + exfat_err(sb, "unable to set blocksize"); + return -EINVAL; + } /* read boot sector */ sbi->boot_bh = sb_bread(sb, 0); From e106e269c5cb38315eb0a0e7e38f71e9b20c8c66 Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 4 Nov 2025 20:50:08 +0800 Subject: [PATCH 0733/1075] isofs: check the return value of sb_min_blocksize() in isofs_fill_super sb_min_blocksize() may return 0. Check its return value to avoid opt->blocksize and sb->s_blocksize is 0. Cc: stable@vger.kernel.org # v6.15 Fixes: 1b17a46c9243e9 ("isofs: convert isofs to use the new mount API") Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Yongpeng Yang Link: https://patch.msgid.link/20251104125009.2111925-4-yangyongpeng.storage@gmail.com Signed-off-by: Christian Brauner --- fs/isofs/inode.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 6f0e6b19383c..ad3143d4066b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -610,6 +610,11 @@ static int isofs_fill_super(struct super_block *s, struct fs_context *fc) goto out_freesbi; } opt->blocksize = sb_min_blocksize(s, opt->blocksize); + if (!opt->blocksize) { + printk(KERN_ERR + "ISOFS: unable to set blocksize\n"); + goto out_freesbi; + } sbi->s_high_sierra = 0; /* default is iso9660 */ sbi->s_session = opt->session; From 124af0868ec6929ba838fb76d25f00c06ba8fc0d Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 4 Nov 2025 20:50:09 +0800 Subject: [PATCH 0734/1075] xfs: check the return value of sb_min_blocksize() in xfs_fs_fill_super sb_min_blocksize() may return 0. Check its return value to avoid the filesystem super block when sb->s_blocksize is 0. Cc: stable@vger.kernel.org # v6.15 Fixes: a64e5a596067bd ("bdev: add back PAGE_SIZE block size validation for sb_set_blocksize()") Reviewed-by: Christoph Hellwig Signed-off-by: Yongpeng Yang Link: https://patch.msgid.link/20251104125009.2111925-5-yangyongpeng.storage@gmail.com Reviewed-by: Darrick J. Wong Signed-off-by: Christian Brauner --- fs/xfs/xfs_super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e85a156dc17d..fbb8009f1c0f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1662,7 +1662,10 @@ xfs_fs_fill_super( if (error) return error; - sb_min_blocksize(sb, BBSIZE); + if (!sb_min_blocksize(sb, BBSIZE)) { + xfs_err(mp, "unable to set blocksize"); + return -EINVAL; + } sb->s_xattr = xfs_xattr_handlers; sb->s_export_op = &xfs_export_operations; #ifdef CONFIG_XFS_QUOTA From c014021253d77cd89b2d8788ce522283d83fbd40 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Mon, 27 Oct 2025 03:46:47 -0700 Subject: [PATCH 0735/1075] virtio-fs: fix incorrect check for fsvq->kobj In virtio_fs_add_queues_sysfs(), the code incorrectly checks fs->mqs_kobj after calling kobject_create_and_add(). Change the check to fsvq->kobj (fs->mqs_kobj -> fsvq->kobj) to ensure the per-queue kobject is successfully created. Fixes: 87cbdc396a31 ("virtio_fs: add sysfs entries for queue information") Signed-off-by: Alok Tiwari Link: https://patch.msgid.link/20251027104658.1668537-1-alok.a.tiwari@oracle.com Reviewed-by: Stefan Hajnoczi Signed-off-by: Christian Brauner --- fs/fuse/virtio_fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 6bc7c97b017d..b2f6486fe1d5 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -373,7 +373,7 @@ static int virtio_fs_add_queues_sysfs(struct virtio_fs *fs) sprintf(buff, "%d", i); fsvq->kobj = kobject_create_and_add(buff, fs->mqs_kobj); - if (!fs->mqs_kobj) { + if (!fsvq->kobj) { ret = -ENOMEM; goto out_del; } From 8637fa89e678422995301ddb20b74190dffcccee Mon Sep 17 00:00:00 2001 From: Yongpeng Yang Date: Tue, 4 Nov 2025 20:50:10 +0800 Subject: [PATCH 0736/1075] block: add __must_check attribute to sb_min_blocksize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When sb_min_blocksize() returns 0 and the return value is not checked, it may lead to a situation where sb->s_blocksize is 0 when accessing the filesystem super block. After commit a64e5a596067bd ("bdev: add back PAGE_SIZE block size validation for sb_set_blocksize()"), this becomes more likely to happen when the block device’s logical_block_size is larger than PAGE_SIZE and the filesystem is unformatted. Add the __must_check attribute to ensure callers always check the return value. Cc: stable@vger.kernel.org # v6.15 Suggested-by: Matthew Wilcox Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Yongpeng Yang Link: https://patch.msgid.link/20251104125009.2111925-6-yangyongpeng.storage@gmail.com Signed-off-by: Christian Brauner --- block/bdev.c | 2 +- include/linux/fs.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/block/bdev.c b/block/bdev.c index 810707cca970..638f0cd458ae 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -231,7 +231,7 @@ int sb_set_blocksize(struct super_block *sb, int size) EXPORT_SYMBOL(sb_set_blocksize); -int sb_min_blocksize(struct super_block *sb, int size) +int __must_check sb_min_blocksize(struct super_block *sb, int size) { int minsize = bdev_logical_block_size(sb->s_bdev); if (size < minsize) diff --git a/include/linux/fs.h b/include/linux/fs.h index c895146c1444..3ea98c6cce81 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3423,8 +3423,8 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); extern void inode_add_lru(struct inode *inode); -extern int sb_set_blocksize(struct super_block *, int); -extern int sb_min_blocksize(struct super_block *, int); +int sb_set_blocksize(struct super_block *sb, int size); +int __must_check sb_min_blocksize(struct super_block *sb, int size); int generic_file_mmap(struct file *, struct vm_area_struct *); int generic_file_mmap_prepare(struct vm_area_desc *desc); From 90f601b497d76f40fa66795c3ecf625b6aced9fd Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Wed, 5 Nov 2025 02:29:23 +0000 Subject: [PATCH 0737/1075] binfmt_misc: restore write access before closing files opened by open_exec() bm_register_write() opens an executable file using open_exec(), which internally calls do_open_execat() and denies write access on the file to avoid modification while it is being executed. However, when an error occurs, bm_register_write() closes the file using filp_close() directly. This does not restore the write permission, which may cause subsequent write operations on the same file to fail. Fix this by calling exe_file_allow_write_access() before filp_close() to restore the write permission properly. Fixes: e7850f4d844e ("binfmt_misc: fix possible deadlock in bm_register_write") Signed-off-by: Zilin Guan Link: https://patch.msgid.link/20251105022923.1813587-1-zilin@seu.edu.cn Signed-off-by: Christian Brauner --- fs/binfmt_misc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a839f960cd4a..a8b1d79e4af0 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -837,8 +837,10 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, inode_unlock(d_inode(root)); if (err) { - if (f) + if (f) { + exe_file_allow_write_access(f); filp_close(f, NULL); + } kfree(e); return err; } From 3cd2018e15b3d66d2187d92867e265f45ad79e6f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 2 Nov 2025 20:09:21 +0100 Subject: [PATCH 0738/1075] spi: Try to get ACPI GPIO IRQ earlier Since commit d24cfee7f63d ("spi: Fix acpi deferred irq probe"), the acpi_dev_gpio_irq_get() call gets delayed till spi_probe() is called on the SPI device. If there is no driver for the SPI device then the move to spi_probe() results in acpi_dev_gpio_irq_get() never getting called. This may cause problems by leaving the GPIO pin floating because this call is responsible for setting up the GPIO pin direction and/or bias according to the values from the ACPI tables. Re-add the removed acpi_dev_gpio_irq_get() in acpi_register_spi_device() to ensure the GPIO pin is always correctly setup, while keeping the acpi_dev_gpio_irq_get() call added to spi_probe() to deal with -EPROBE_DEFER returns caused by the GPIO controller not having a driver yet. Link: https://bbs.archlinux.org/viewtopic.php?id=302348 Fixes: d24cfee7f63d ("spi: Fix acpi deferred irq probe") Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Link: https://patch.msgid.link/20251102190921.30068-1-hansg@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 2e0647a06890..8588e8562220 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2851,6 +2851,16 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, acpi_set_modalias(adev, acpi_device_hid(adev), spi->modalias, sizeof(spi->modalias)); + /* + * This gets re-tried in spi_probe() for -EPROBE_DEFER handling in case + * the GPIO controller does not have a driver yet. This needs to be done + * here too, because this call sets the GPIO direction and/or bias. + * Setting these needs to be done even if there is no driver, in which + * case spi_probe() will never get called. + */ + if (spi->irq < 0) + spi->irq = acpi_dev_gpio_irq_get(adev, 0); + acpi_device_set_enumerated(adev); adev->power.flags.ignore_parent = true; From c74619e7602e88a0239cd4999571dd31081e9adf Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Mon, 3 Nov 2025 09:24:36 +0100 Subject: [PATCH 0739/1075] wifi: mac80211_hwsim: Limit destroy_on_close radio removal to netgroup hwsim radios marked destroy_on_close are removed when the Netlink socket that created them is closed. As the portid is not unique across network namespaces, closing a socket in one namespace may remove radios in another if it has the destroy_on_close flag set. Instead of matching the network namespace, match the netgroup of the radio to limit radio removal to those that have been created by the closing Netlink socket. The netgroup of a radio identifies the network namespace it was created in, and matching on it removes a destroy_on_close radio even if it has been moved to another namespace. Fixes: 100cb9ff40e0 ("mac80211_hwsim: Allow managing radios from non-initial namespaces") Signed-off-by: Martin Willi Link: https://patch.msgid.link/20251103082436.30483-1-martin@strongswan.org Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 9f856042a67a..d28bf18d57ec 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -6698,14 +6698,15 @@ static struct genl_family hwsim_genl_family __ro_after_init = { .n_mcgrps = ARRAY_SIZE(hwsim_mcgrps), }; -static void remove_user_radios(u32 portid) +static void remove_user_radios(u32 portid, int netgroup) { struct mac80211_hwsim_data *entry, *tmp; LIST_HEAD(list); spin_lock_bh(&hwsim_radio_lock); list_for_each_entry_safe(entry, tmp, &hwsim_radios, list) { - if (entry->destroy_on_close && entry->portid == portid) { + if (entry->destroy_on_close && entry->portid == portid && + entry->netgroup == netgroup) { list_move(&entry->list, &list); rhashtable_remove_fast(&hwsim_radios_rht, &entry->rht, hwsim_rht_params); @@ -6730,7 +6731,7 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb, if (state != NETLINK_URELEASE) return NOTIFY_DONE; - remove_user_radios(notify->portid); + remove_user_radios(notify->portid, hwsim_net_get_netgroup(notify->net)); if (notify->portid == hwsim_net_get_wmediumd(notify->net)) { printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink" From 8d54eacd82a0623a963e0c150ad3b02970638b0d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 4 Nov 2025 16:12:00 -0800 Subject: [PATCH 0740/1075] xfs: fix delalloc write failures in software-provided atomic writes With the 20 Oct 2025 release of fstests, generic/521 fails for me on regular (aka non-block-atomic-writes) storage: QA output created by 521 dowrite: write: Input/output error LOG DUMP (8553 total operations): 1( 1 mod 256): SKIPPED (no operation) 2( 2 mod 256): WRITE 0x7e000 thru 0x8dfff (0x10000 bytes) HOLE 3( 3 mod 256): READ 0x69000 thru 0x79fff (0x11000 bytes) 4( 4 mod 256): FALLOC 0x53c38 thru 0x5e853 (0xac1b bytes) INTERIOR 5( 5 mod 256): COPY 0x55000 thru 0x59fff (0x5000 bytes) to 0x25000 thru 0x29fff 6( 6 mod 256): WRITE 0x74000 thru 0x88fff (0x15000 bytes) 7( 7 mod 256): ZERO 0xedb1 thru 0x11693 (0x28e3 bytes) with a warning in dmesg from iomap about XFS trying to give it a delalloc mapping for a directio write. Fix the software atomic write iomap_begin code to convert the reservation into a written mapping. This doesn't fix the data corruption problems reported by generic/760, but it's a start. Cc: stable@vger.kernel.org # v6.16 Fixes: bd1d2c21d5d249 ("xfs: add xfs_atomic_write_cow_iomap_begin()") Signed-off-by: Darrick J. Wong Reviewed-by: John Garry Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_iomap.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d3f6e3e42a11..788bfdce608a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1130,7 +1130,7 @@ xfs_atomic_write_cow_iomap_begin( return -EAGAIN; trace_xfs_iomap_atomic_write_cow(ip, offset, length); - +retry: xfs_ilock(ip, XFS_ILOCK_EXCL); if (!ip->i_cowfp) { @@ -1141,6 +1141,8 @@ xfs_atomic_write_cow_iomap_begin( if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) cmap.br_startoff = end_fsb; if (cmap.br_startoff <= offset_fsb) { + if (isnullstartblock(cmap.br_startblock)) + goto convert_delay; xfs_trim_extent(&cmap, offset_fsb, count_fsb); goto found; } @@ -1169,8 +1171,10 @@ xfs_atomic_write_cow_iomap_begin( if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) cmap.br_startoff = end_fsb; if (cmap.br_startoff <= offset_fsb) { - xfs_trim_extent(&cmap, offset_fsb, count_fsb); xfs_trans_cancel(tp); + if (isnullstartblock(cmap.br_startblock)) + goto convert_delay; + xfs_trim_extent(&cmap, offset_fsb, count_fsb); goto found; } @@ -1210,6 +1214,19 @@ xfs_atomic_write_cow_iomap_begin( xfs_iunlock(ip, XFS_ILOCK_EXCL); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); +convert_delay: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + error = xfs_bmapi_convert_delalloc(ip, XFS_COW_FORK, offset, iomap, + NULL); + if (error) + return error; + + /* + * Try the lookup again, because the delalloc conversion might have + * turned the COW mapping into unwritten, but we need it to be in + * written state. + */ + goto retry; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; From 8d7bba1e8314013ecc817a91624104ceb9352ddc Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 4 Nov 2025 16:15:38 -0800 Subject: [PATCH 0741/1075] xfs: fix various problems in xfs_atomic_write_cow_iomap_begin I think there are several things wrong with this function: A) xfs_bmapi_write can return a much larger unwritten mapping than what the caller asked for. We convert part of that range to written, but return the entire written mapping to iomap even though that's inaccurate. B) The arguments to xfs_reflink_convert_cow_locked are wrong -- an unwritten mapping could be *smaller* than the write range (or even the hole range). In this case, we convert too much file range to written state because we then return a smaller mapping to iomap. C) It doesn't handle delalloc mappings. This I covered in the patch that I already sent to the list. D) Reassigning count_fsb to handle the hole means that if the second cmap lookup attempt succeeds (due to racing with someone else) we trim the mapping more than is strictly necessary. The changing meaning of count_fsb makes this harder to notice. E) The tracepoint is kinda wrong because @length is mutated. That makes it harder to chase the data flows through this function because you can't just grep on the pos/bytecount strings. F) We don't actually check that the br_state = XFS_EXT_NORM assignment is accurate, i.e that the cow fork actually contains a written mapping for the range we're interested in G) Somewhat inadequate documentation of why we need to xfs_trim_extent so aggressively in this function. H) Not sure why xfs_iomap_end_fsb is used here, the vfs already clamped the write range to s_maxbytes. Fix these issues, and then the atomic writes regressions in generic/760, generic/617, generic/091, generic/263, and generic/521 all go away for me. Cc: stable@vger.kernel.org # v6.16 Fixes: bd1d2c21d5d249 ("xfs: add xfs_atomic_write_cow_iomap_begin()") Signed-off-by: Darrick J. Wong Reviewed-by: John Garry Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_iomap.c | 61 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 788bfdce608a..490e12cb99be 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1091,6 +1091,29 @@ const struct iomap_ops xfs_zoned_direct_write_iomap_ops = { }; #endif /* CONFIG_XFS_RT */ +#ifdef DEBUG +static void +xfs_check_atomic_cow_conversion( + struct xfs_inode *ip, + xfs_fileoff_t offset_fsb, + xfs_filblks_t count_fsb, + const struct xfs_bmbt_irec *cmap) +{ + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec cmap2 = { }; + + if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap2)) + xfs_trim_extent(&cmap2, offset_fsb, count_fsb); + + ASSERT(cmap2.br_startoff == cmap->br_startoff); + ASSERT(cmap2.br_blockcount == cmap->br_blockcount); + ASSERT(cmap2.br_startblock == cmap->br_startblock); + ASSERT(cmap2.br_state == cmap->br_state); +} +#else +# define xfs_check_atomic_cow_conversion(...) ((void)0) +#endif + static int xfs_atomic_write_cow_iomap_begin( struct inode *inode, @@ -1102,9 +1125,10 @@ xfs_atomic_write_cow_iomap_begin( { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); - xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); - xfs_filblks_t count_fsb = end_fsb - offset_fsb; + const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + const xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); + const xfs_filblks_t count_fsb = end_fsb - offset_fsb; + xfs_filblks_t hole_count_fsb; int nmaps = 1; xfs_filblks_t resaligned; struct xfs_bmbt_irec cmap; @@ -1143,14 +1167,20 @@ xfs_atomic_write_cow_iomap_begin( if (cmap.br_startoff <= offset_fsb) { if (isnullstartblock(cmap.br_startblock)) goto convert_delay; + + /* + * cmap could extend outside the write range due to previous + * speculative preallocations. We must trim cmap to the write + * range because the cow fork treats written mappings to mean + * "write in progress". + */ xfs_trim_extent(&cmap, offset_fsb, count_fsb); goto found; } - end_fsb = cmap.br_startoff; - count_fsb = end_fsb - offset_fsb; + hole_count_fsb = cmap.br_startoff - offset_fsb; - resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, + resaligned = xfs_aligned_fsb_count(offset_fsb, hole_count_fsb, xfs_get_cowextsz_hint(ip)); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -1186,7 +1216,7 @@ xfs_atomic_write_cow_iomap_begin( * atomic writes to that same range will be aligned (and don't require * this COW-based method). */ - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, + error = xfs_bmapi_write(tp, ip, offset_fsb, hole_count_fsb, XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC | XFS_BMAPI_EXTSZALIGN, 0, &cmap, &nmaps); if (error) { @@ -1199,17 +1229,26 @@ xfs_atomic_write_cow_iomap_begin( if (error) goto out_unlock; + /* + * cmap could map more blocks than the range we passed into bmapi_write + * because of EXTSZALIGN or adjacent pre-existing unwritten mappings + * that were merged. Trim cmap to the original write range so that we + * don't convert more than we were asked to do for this write. + */ + xfs_trim_extent(&cmap, offset_fsb, count_fsb); + found: if (cmap.br_state != XFS_EXT_NORM) { - error = xfs_reflink_convert_cow_locked(ip, offset_fsb, - count_fsb); + error = xfs_reflink_convert_cow_locked(ip, cmap.br_startoff, + cmap.br_blockcount); if (error) goto out_unlock; cmap.br_state = XFS_EXT_NORM; + xfs_check_atomic_cow_conversion(ip, offset_fsb, count_fsb, + &cmap); } - length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); - trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); + trace_xfs_iomap_found(ip, offset, length, XFS_COW_FORK, &cmap); seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); xfs_iunlock(ip, XFS_ILOCK_EXCL); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); From f5714a3c1a5658251360603231efe1bee21f9c2c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Nov 2025 08:50:53 -0500 Subject: [PATCH 0742/1075] xfs: fix a rtgroup leak when xfs_init_zone fails Drop the rtgrop reference when xfs_init_zone fails for a conventional device. Fixes: 4e4d52075577 ("xfs: add the zoned space allocator") Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 040402240807..fa76fa4ba0f9 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -1249,8 +1249,10 @@ xfs_mount_zones( while ((rtg = xfs_rtgroup_next(mp, rtg))) { error = xfs_init_zone(&iz, rtg, NULL); - if (error) + if (error) { + xfs_rtgroup_rele(rtg); goto out_free_zone_info; + } } } From 21ab5179aafa2ded7f3851bfe7e043f8a3b6199d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 3 Nov 2025 05:15:13 -0500 Subject: [PATCH 0743/1075] xfs: fix zone selection in xfs_select_open_zone_mru xfs_select_open_zone_mru needs to pass XFS_ZONE_ALLOC_OK to xfs_try_use_zone because we only want to tightly pack into zones of the same or a compatible temperature instead of any available zone. This got broken in commit 0301dae732a5 ("xfs: refactor hint based zone allocation"), which failed to update this particular caller when switching to an enum. xfs/638 sometimes, but not reliably fails due to this change. Fixes: 0301dae732a5 ("xfs: refactor hint based zone allocation") Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index fa76fa4ba0f9..ef7a931ebde5 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -615,7 +615,7 @@ xfs_select_open_zone_mru( lockdep_assert_held(&zi->zi_open_zones_lock); list_for_each_entry_reverse(oz, &zi->zi_open_zones, oz_entry) - if (xfs_try_use_zone(zi, file_hint, oz, false)) + if (xfs_try_use_zone(zi, file_hint, oz, XFS_ZONE_ALLOC_OK)) return oz; cond_resched_lock(&zi->zi_open_zones_lock); From 1fd5367391bf0eeb09e624c4ab45121b54eaab96 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 5 Nov 2025 15:47:01 +0000 Subject: [PATCH 0744/1075] io_uring: fix types for region size calulation ->nr_pages is int, it needs type extension before calculating the region size. Fixes: a90558b36ccee ("io_uring/memmap: helper for pinning region pages") Signed-off-by: Pavel Begunkov [axboe: style fixup] Signed-off-by: Jens Axboe --- io_uring/memmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/memmap.c b/io_uring/memmap.c index 2e99dffddfc5..add03ca75cb9 100644 --- a/io_uring/memmap.c +++ b/io_uring/memmap.c @@ -135,7 +135,7 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg) { - unsigned long size = mr->nr_pages << PAGE_SHIFT; + unsigned long size = (size_t) mr->nr_pages << PAGE_SHIFT; struct page **pages; int nr_pages; From 94f54924b96d3565c6b559294b3401b5496c21ac Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 12 Sep 2025 15:43:21 +0900 Subject: [PATCH 0745/1075] btrfs: zoned: fix conventional zone capacity calculation When a block group contains both conventional zone and sequential zone, the capacity of the block group is wrongly set to the block group's full length. The capacity should be calculated in btrfs_load_block_group_* using the last allocation offset. Fixes: 568220fa9657 ("btrfs: zoned: support RAID0/1/10 on top of raid stripe tree") CC: stable@vger.kernel.org # v6.12+ Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 838149fa60ce..8f006dff8893 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1317,6 +1317,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, if (!btrfs_dev_is_sequential(device, info->physical)) { up_read(&dev_replace->rwsem); info->alloc_offset = WP_CONVENTIONAL; + info->capacity = device->zone_info->zone_size; return 0; } @@ -1683,8 +1684,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); if (num_conventional > 0) { - /* Zone capacity is always zone size in emulation */ - cache->zone_capacity = cache->length; ret = calculate_alloc_pointer(cache, &last_alloc, new); if (ret) { btrfs_err(fs_info, @@ -1693,6 +1692,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } else if (map->num_stripes == num_conventional) { cache->alloc_offset = last_alloc; + cache->zone_capacity = cache->length; set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); goto out; } From 6a1ab50135ce829b834b448ce49867b5210a1641 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 16 Sep 2025 11:46:11 +0900 Subject: [PATCH 0746/1075] btrfs: zoned: fix stripe width calculation The stripe offset calculation in the zoned code for raid0 and raid10 wrongly uses map->stripe_size to calculate it. In fact, map->stripe_size is the size of the device extent composing the block group, which always is the zone_size on the zoned setup. Fix it by using BTRFS_STRIPE_LEN and BTRFS_STRIPE_LEN_SHIFT. Also, optimize the calculation a bit by doing the common calculation only once. Fixes: c0d90a79e8e6 ("btrfs: zoned: fix alloc_offset calculation for partly conventional block groups") CC: stable@vger.kernel.org # 6.17+ Signed-off-by: Naohiro Aota Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 56 ++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 8f006dff8893..b622c73fe30f 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1523,6 +1523,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; + u64 stripe_nr = 0, stripe_offset = 0; + u32 stripe_index = 0; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", @@ -1530,28 +1532,26 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg, return -EINVAL; } + if (last_alloc) { + u32 factor = map->num_stripes; + + stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT; + stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK; + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); + } + for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { - u64 stripe_nr, full_stripe_nr; - u64 stripe_offset; - int stripe_index; - stripe_nr = div64_u64(last_alloc, map->stripe_size); - stripe_offset = stripe_nr * map->stripe_size; - full_stripe_nr = div_u64(stripe_nr, map->num_stripes); - div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); - - zone_info[i].alloc_offset = - full_stripe_nr * map->stripe_size; + zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); if (stripe_index > i) - zone_info[i].alloc_offset += map->stripe_size; + zone_info[i].alloc_offset += BTRFS_STRIPE_LEN; else if (stripe_index == i) - zone_info[i].alloc_offset += - (last_alloc - stripe_offset); + zone_info[i].alloc_offset += stripe_offset; } if (test_bit(0, active) != test_bit(i, active)) { @@ -1575,6 +1575,8 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, u64 last_alloc) { struct btrfs_fs_info *fs_info = bg->fs_info; + u64 stripe_nr = 0, stripe_offset = 0; + u32 stripe_index = 0; if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", @@ -1582,6 +1584,14 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, return -EINVAL; } + if (last_alloc) { + u32 factor = map->num_stripes / map->sub_stripes; + + stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT; + stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK; + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); + } + for (int i = 0; i < map->num_stripes; i++) { if (zone_info[i].alloc_offset == WP_MISSING_DEV) continue; @@ -1595,26 +1605,12 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg, } if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { - u64 stripe_nr, full_stripe_nr; - u64 stripe_offset; - int stripe_index; - - stripe_nr = div64_u64(last_alloc, map->stripe_size); - stripe_offset = stripe_nr * map->stripe_size; - full_stripe_nr = div_u64(stripe_nr, - map->num_stripes / map->sub_stripes); - div_u64_rem(stripe_nr, - (map->num_stripes / map->sub_stripes), - &stripe_index); - - zone_info[i].alloc_offset = - full_stripe_nr * map->stripe_size; + zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); if (stripe_index > (i / map->sub_stripes)) - zone_info[i].alloc_offset += map->stripe_size; + zone_info[i].alloc_offset += BTRFS_STRIPE_LEN; else if (stripe_index == (i / map->sub_stripes)) - zone_info[i].alloc_offset += - (last_alloc - stripe_offset); + zone_info[i].alloc_offset += stripe_offset; } if ((i % map->sub_stripes) == 0) { From bfe3d755ef7cec71aac6ecda34a107624735aac7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 29 Oct 2025 13:05:32 +0000 Subject: [PATCH 0747/1075] btrfs: do not update last_log_commit when logging inode due to a new name When logging that a new name exists, we skip updating the inode's last_log_commit field to prevent a later explicit fsync against the inode from doing nothing (as updating last_log_commit makes btrfs_inode_in_log() return true). We are detecting, at btrfs_log_inode(), that logging a new name is happening by checking the logging mode is not LOG_INODE_EXISTS, but that is not enough because we may log parent directories when logging a new name of a file in LOG_INODE_ALL mode - we need to check that the logging_new_name field of the log context too. An example scenario where this results in an explicit fsync against a directory not persisting changes to the directory is the following: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ touch /mnt/foo $ sync $ mkdir /mnt/dir # Write some data to our file and fsync it. $ xfs_io -c "pwrite -S 0xab 0 64K" -c "fsync" /mnt/foo # Add a new link to our file. Since the file was logged before, we # update it in the log tree by calling btrfs_log_new_name(). $ ln /mnt/foo /mnt/dir/bar # fsync the root directory - we expect it to persist the dentry for # the new directory "dir". $ xfs_io -c "fsync" /mnt After mounting the fs the entry for directory "dir" does not exists, despite the explicit fsync on the root directory. Here's why this happens: 1) When we fsync the file we log the inode, so that it's present in the log tree; 2) When adding the new link we enter btrfs_log_new_name(), and since the inode is in the log tree we proceed to updating the inode in the log tree; 3) We first set the inode's last_unlink_trans to the current transaction (early in btrfs_log_new_name()); 4) We then eventually enter btrfs_log_inode_parent(), and after logging the file's inode, we call btrfs_log_all_parents() because the inode's last_unlink_trans matches the current transaction's ID (updated in the previous step); 5) So btrfs_log_all_parents() logs the root directory by calling btrfs_log_inode() for the root's inode with a log mode of LOG_INODE_ALL so that new dentries are logged; 6) At btrfs_log_inode(), because the log mode is LOG_INODE_ALL, we update root inode's last_log_commit to the last transaction that changed the inode (->last_sub_trans field of the inode), which corresponds to the current transaction's ID; 7) Then later when user space explicitly calls fsync against the root directory, we enter btrfs_sync_file(), which calls skip_inode_logging() and that returns true, since its call to btrfs_inode_in_log() returns true and there are no ordered extents (it's a directory, never has ordered extents). This results in btrfs_sync_file() returning without syncing the log or committing the current transaction, so all the updates we did when logging the new name, including logging the root directory, are not persisted. So fix this by but updating the inode's last_log_commit if we are sure we are not logging a new name (if ctx->logging_new_name is false). A test case for fstests will follow soon. Reported-by: Vyacheslav Kovalevsky Link: https://lore.kernel.org/linux-btrfs/03c5d7ec-5b3d-49d1-95bc-8970a7f82d87@gmail.com/ Fixes: 130341be7ffa ("btrfs: always update the logged transaction when logging new names") CC: stable@vger.kernel.org # 6.1+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 00a59fb79167..98599644986f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -7122,7 +7122,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, * a power failure unless the log was synced as part of an fsync * against any other unrelated inode. */ - if (inode_only != LOG_INODE_EXISTS) + if (!ctx->logging_new_name && inode_only != LOG_INODE_EXISTS) inode->last_log_commit = inode->last_sub_trans; spin_unlock(&inode->lock); From 5fea61aa1ca70c4b3738eebad9ce2d7e7938ebbd Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Wed, 5 Nov 2025 03:53:21 +0000 Subject: [PATCH 0748/1075] btrfs: scrub: put bio after errors in scrub_raid56_parity_stripe() scrub_raid56_parity_stripe() allocates a bio with bio_alloc(), but fails to release it on some error paths, leading to a potential memory leak. Add the missing bio_put() calls to properly drop the bio reference in those error cases. Fixes: 1009254bf22a3 ("btrfs: scrub: use scrub_stripe to implement RAID56 P/Q scrub") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Qu Wenruo Signed-off-by: Zilin Guan Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/scrub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 651b11884f82..ba20d9286a34 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2203,6 +2203,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start, &length, &bioc, NULL, NULL); if (ret < 0) { + bio_put(bio); btrfs_put_bioc(bioc); btrfs_bio_counter_dec(fs_info); goto out; @@ -2212,6 +2213,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx, btrfs_put_bioc(bioc); if (!rbio) { ret = -ENOMEM; + bio_put(bio); btrfs_bio_counter_dec(fs_info); goto out; } From c367af440e03eba7beb0c9f3fe540f9bcb69134a Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Wed, 5 Nov 2025 02:37:22 +0000 Subject: [PATCH 0749/1075] btrfs: release root after error in data_reloc_print_warning_inode() data_reloc_print_warning_inode() calls btrfs_get_fs_root() to obtain local_root, but fails to release its reference when paths_from_inode() returns an error. This causes a potential memory leak. Add a missing btrfs_put_root() call in the error path to properly decrease the reference count of local_root. Fixes: b9a9a85059cde ("btrfs: output affected files when relocation fails") CC: stable@vger.kernel.org # 6.6+ Reviewed-by: Qu Wenruo Signed-off-by: Zilin Guan Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b95175116ea3..d097532fd85b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -177,8 +177,10 @@ static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, return ret; } ret = paths_from_inode(inum, ipath); - if (ret < 0) + if (ret < 0) { + btrfs_put_root(local_root); goto err; + } /* * We deliberately ignore the bit ipath might have been too small to From afb47765f9235181fddc61c8633b5a8cfae29fd2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 4 Nov 2025 14:11:49 -0400 Subject: [PATCH 0750/1075] iommufd: Make vfio_compat's unmap succeed if the range is already empty iommufd returns ENOENT when attempting to unmap a range that is already empty, while vfio type1 returns success. Fix vfio_compat to match. Fixes: d624d6652a65 ("iommufd: vfio container FD ioctl compatibility") Link: https://patch.msgid.link/r/0-v1-76be45eff0be+5d-iommufd_unmap_compat_jgg@nvidia.com Reviewed-by: Nicolin Chen Reviewed-by: Alex Mastro Reported-by: Alex Mastro Closes: https://lore.kernel.org/r/aP0S5ZF9l3sWkJ1G@devgpu012.nha5.facebook.com Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/io_pagetable.c | 12 +++--------- drivers/iommu/iommufd/ioas.c | 4 ++++ tools/testing/selftests/iommu/iommufd.c | 2 ++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index c0360c450880..75d60f2ad900 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -707,7 +707,8 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, struct iopt_area *area; unsigned long unmapped_bytes = 0; unsigned int tries = 0; - int rc = -ENOENT; + /* If there are no mapped entries then success */ + int rc = 0; /* * The domains_rwsem must be held in read mode any time any area->pages @@ -777,8 +778,6 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, down_write(&iopt->iova_rwsem); } - if (unmapped_bytes) - rc = 0; out_unlock_iova: up_write(&iopt->iova_rwsem); @@ -815,13 +814,8 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) { - int rc; - - rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); /* If the IOVAs are empty then unmap all succeeds */ - if (rc == -ENOENT) - return 0; - return rc; + return iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); } /* The caller must always free all the nodes in the allowed_iova rb_root. */ diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 1542c5fd10a8..459a7c516915 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -367,6 +367,10 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) &unmapped); if (rc) goto out_put; + if (!unmapped) { + rc = -ENOENT; + goto out_put; + } } cmd->length = unmapped; diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 3eebf5e3b974..bb4d33dde3c8 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2638,6 +2638,8 @@ TEST_F(vfio_compat_mock_domain, map) ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + /* Unmap of empty is success */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); /* UNMAP_FLAG_ALL requires 0 iova/size */ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); From ebe755605082eddff80eafe0c50915b1366ee98f Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 14 Oct 2025 12:45:12 -0500 Subject: [PATCH 0751/1075] drm/nouveau: set DMA mask before creating the flush page Set the DMA mask before calling nvkm_device_ctor(), so that when the flush page is created in nvkm_fb_ctor(), the allocation will not fail if the page is outside of DMA address space, which can easily happen if IOMMU is disable. In such situations, you will get an error like this: nouveau 0000:65:00.0: DMA addr 0x0000000107c56000+4096 overflow (mask ffffffff, bus limit 0). Commit 38f5359354d4 ("rm/nouveau/pci: set streaming DMA mask early") set the mask after calling nvkm_device_ctor(), but back then there was no flush page being created, which might explain why the mask wasn't set earlier. Flush page allocation was added in commit 5728d064190e ("drm/nouveau/fb: handle sysmem flush page from common code"). nvkm_fb_ctor() calls alloc_page(), which can allocate a page anywhere in system memory, but then calls dma_map_page() on that page. But since the DMA mask is still set to 32, the map can fail if the page is allocated above 4GB. This is easy to reproduce on systems with a lot of memory and IOMMU disabled. An alternative approach would be to force the allocation of the flush page to low memory, by specifying __GFP_DMA32. However, this would always allocate the page in low memory, even though the hardware can access high memory. Signed-off-by: Timur Tabi Reviewed-by: Dave Airlie Signed-off-by: Dave Airlie Link: https://patch.msgid.link/20251014174512.3172102-1-ttabi@nvidia.com --- .../gpu/drm/nouveau/nvkm/engine/device/pci.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c index 8f0261a0d618..7cc5a7499583 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c @@ -1695,6 +1695,18 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg, *pdevice = &pdev->device; pdev->pdev = pci_dev; + /* Set DMA mask based on capabilities reported by the MMU subdev. */ + if (pdev->device.mmu && !pdev->device.pci->agp.bridge) + bits = pdev->device.mmu->dma_bits; + else + bits = 32; + + ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits)); + if (ret && bits != 32) { + dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); + pdev->device.mmu->dma_bits = 32; + } + ret = nvkm_device_ctor(&nvkm_device_pci_func, quirk, &pci_dev->dev, pci_is_pcie(pci_dev) ? NVKM_DEVICE_PCIE : pci_find_capability(pci_dev, PCI_CAP_ID_AGP) ? @@ -1708,17 +1720,5 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg, if (ret) return ret; - /* Set DMA mask based on capabilities reported by the MMU subdev. */ - if (pdev->device.mmu && !pdev->device.pci->agp.bridge) - bits = pdev->device.mmu->dma_bits; - else - bits = 32; - - ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits)); - if (ret && bits != 32) { - dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); - pdev->device.mmu->dma_bits = 32; - } - return 0; } From 1cf52a0d4ba079fb354fa1339f5fb34142228dae Mon Sep 17 00:00:00 2001 From: James Jones Date: Thu, 30 Oct 2025 11:11:52 -0700 Subject: [PATCH 0752/1075] drm: define NVIDIA DRM format modifiers for GB20x The layout of bits within the individual tiles (referred to as sectors in the DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro) changed for 8 and 16-bit surfaces starting in Blackwell 2 GPUs (With the exception of GB10). To denote the difference, extend the sector field in the parametric format modifier definition used to generate modifier values for NVIDIA hardware. Without this change, it would be impossible to differentiate the two layouts based on modifiers, and as a result software could attempt to share surfaces directly between pre-GB20x and GB20x cards, resulting in corruption when the surface was accessed on one of the GPUs after being populated with content by the other. Of note: This change causes the DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D() macro to evaluate its "s" parameter twice, with the side effects that entails. I surveyed all usage of the modifier in the kernel and Mesa code, and that does not appear to be problematic in any current usage, but I thought it was worth calling out. Fixes: 6cc6e08d4542 ("drm/nouveau/kms: add support for GB20x") Signed-off-by: James Jones Reviewed-by: Faith Ekstrand Signed-off-by: Dave Airlie Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251030181153.1208-2-jajones@nvidia.com --- include/uapi/drm/drm_fourcc.h | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index ea91aa8afde9..e527b24bd824 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -979,14 +979,20 @@ extern "C" { * 2 = Gob Height 8, Turing+ Page Kind mapping * 3 = Reserved for future use. * - * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further - * bit remapping step that occurs at an even lower level than the - * page kind and block linear swizzles. This causes the layout of - * surfaces mapped in those SOC's GPUs to be incompatible with the - * equivalent mapping on other GPUs in the same system. + * 22:22 s Sector layout. There is a further bit remapping step that occurs + * 26:27 at an even lower level than the page kind and block linear + * swizzles. This causes the bit arrangement of surfaces in memory + * to differ subtly, and prevents direct sharing of surfaces between + * GPUs with different layouts. * - * 0 = Tegra K1 - Tegra Parker/TX2 Layout. - * 1 = Desktop GPU and Tegra Xavier+ Layout + * 0 = Tegra K1 - Tegra Parker/TX2 Layout + * 1 = Pre-GB20x, GB20x 32+ bpp, GB10, Tegra Xavier-Orin Layout + * 2 = GB20x(Blackwell 2)+ 8 bpp surface layout + * 3 = GB20x(Blackwell 2)+ 16 bpp surface layout + * 4 = Reserved for future use. + * 5 = Reserved for future use. + * 6 = Reserved for future use. + * 7 = Reserved for future use. * * 25:23 c Lossless Framebuffer Compression type. * @@ -1001,7 +1007,7 @@ extern "C" { * 6 = Reserved for future use * 7 = Reserved for future use * - * 55:25 - Reserved for future use. Must be zero. + * 55:28 - Reserved for future use. Must be zero. */ #define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ fourcc_mod_code(NVIDIA, (0x10 | \ @@ -1009,6 +1015,7 @@ extern "C" { (((k) & 0xff) << 12) | \ (((g) & 0x3) << 20) | \ (((s) & 0x1) << 22) | \ + (((s) & 0x6) << 25) | \ (((c) & 0x7) << 23))) /* To grandfather in prior block linear format modifiers to the above layout, From 664ce10246ba00746af94a08b7fbda8ccaacd930 Mon Sep 17 00:00:00 2001 From: James Jones Date: Thu, 30 Oct 2025 11:11:53 -0700 Subject: [PATCH 0753/1075] drm/nouveau: Advertise correct modifiers on GB20x 8 and 16 bit formats use a different layout on GB20x than they did on prior chips. Add the corresponding DRM format modifiers to the list of modifiers supported by the display engine on such chips, and filter the supported modifiers for each format based on its bytes per pixel in nv50_plane_format_mod_supported(). Note this logic will need to be updated when GB10 support is added, since it is a GB20x chip that uses the pre-GB20x sector layout for all formats. Fixes: 6cc6e08d4542 ("drm/nouveau/kms: add support for GB20x") Signed-off-by: James Jones Reviewed-by: Faith Ekstrand Signed-off-by: Dave Airlie Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251030181153.1208-3-jajones@nvidia.com --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 4 ++- drivers/gpu/drm/nouveau/dispnv50/disp.h | 1 + drivers/gpu/drm/nouveau/dispnv50/wndw.c | 24 +++++++++++++-- drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c | 33 +++++++++++++++++++++ 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index e97e39abf3a2..12b1dba8e05d 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2867,7 +2867,9 @@ nv50_display_create(struct drm_device *dev) } /* Assign the correct format modifiers */ - if (disp->disp->object.oclass >= TU102_DISP) + if (disp->disp->object.oclass >= GB202_DISP) + nouveau_display(dev)->format_modifiers = wndwca7e_modifiers; + else if (disp->disp->object.oclass >= TU102_DISP) nouveau_display(dev)->format_modifiers = wndwc57e_modifiers; else if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h index 15f9242b72ac..5d998f0319dc 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.h +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h @@ -104,4 +104,5 @@ struct nouveau_encoder *nv50_real_outp(struct drm_encoder *encoder); extern const u64 disp50xx_modifiers[]; extern const u64 disp90xx_modifiers[]; extern const u64 wndwc57e_modifiers[]; +extern const u64 wndwca7e_modifiers[]; #endif diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index e2c55f4b9c5a..ef9e410babbf 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -786,13 +786,14 @@ nv50_wndw_destroy(struct drm_plane *plane) } /* This function assumes the format has already been validated against the plane - * and the modifier was validated against the device-wides modifier list at FB + * and the modifier was validated against the device-wide modifier list at FB * creation time. */ static bool nv50_plane_format_mod_supported(struct drm_plane *plane, u32 format, u64 modifier) { struct nouveau_drm *drm = nouveau_drm(plane->dev); + const struct drm_format_info *info = drm_format_info(format); uint8_t i; /* All chipsets can display all formats in linear layout */ @@ -800,13 +801,32 @@ static bool nv50_plane_format_mod_supported(struct drm_plane *plane, return true; if (drm->client.device.info.chipset < 0xc0) { - const struct drm_format_info *info = drm_format_info(format); const uint8_t kind = (modifier >> 12) & 0xff; if (!format) return false; for (i = 0; i < info->num_planes; i++) if ((info->cpp[i] != 4) && kind != 0x70) return false; + } else if (drm->client.device.info.chipset >= 0x1b2) { + const uint8_t slayout = ((modifier >> 22) & 0x1) | + ((modifier >> 25) & 0x6); + + if (!format) + return false; + + /* + * Note in practice this implies only formats where cpp is equal + * for each plane, or >= 4 for all planes, are supported. + */ + for (i = 0; i < info->num_planes; i++) { + if (((info->cpp[i] == 2) && slayout != 3) || + ((info->cpp[i] == 1) && slayout != 2) || + ((info->cpp[i] >= 4) && slayout != 1)) + return false; + + /* 24-bit not supported. It has yet another layout */ + WARN_ON(info->cpp[i] == 3); + } } return true; diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c index 0d8e9a9d1a57..2cec8cfbd546 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c @@ -179,6 +179,39 @@ wndwca7e_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw) return 0; } +/**************************************************************** + * Log2(block height) ----------------------------+ * + * Page Kind ----------------------------------+ | * + * Gob Height/Page Kind Generation ------+ | | * + * Sector layout -------+ | | | * + * Compression ------+ | | | | */ +const u64 wndwca7e_modifiers[] = { /* | | | | | */ + /* 4cpp+ modifiers */ + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 0), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 1), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 2), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 3), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 4), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 5), + /* 1cpp/8bpp modifiers */ + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 0), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 1), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 2), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 3), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 4), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 5), + /* 2cpp/16bpp modifiers */ + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 0), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 1), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 2), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 3), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 4), + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 5), + /* All formats support linear */ + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID +}; + static const struct nv50_wndw_func wndwca7e = { .acquire = wndwc37e_acquire, From 6d08340d1e354787d6c65a8c3cdd4d41ffb8a5ed Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Nov 2025 22:54:02 +0100 Subject: [PATCH 0754/1075] Revert "perf/x86: Always store regs->ip in perf_callchain_kernel()" This reverts commit 83f44ae0f8afcc9da659799db8693f74847e66b3. Currently we store initial stacktrace entry twice for non-HW ot_regs, which means callers that fail perf_hw_regs(regs) condition in perf_callchain_kernel. It's easy to reproduce this bpftrace: # bpftrace -e 'tracepoint:sched:sched_process_exec { print(kstack()); }' Attaching 1 probe... bprm_execve+1767 bprm_execve+1767 do_execveat_common.isra.0+425 __x64_sys_execve+56 do_syscall_64+133 entry_SYSCALL_64_after_hwframe+118 When perf_callchain_kernel calls unwind_start with first_frame, AFAICS we do not skip regs->ip, but it's added as part of the unwind process. Hence reverting the extra perf_callchain_store for non-hw regs leg. I was not able to bisect this, so I'm not really sure why this was needed in v5.2 and why it's not working anymore, but I could see double entries as far as v5.10. I did the test for both ORC and framepointer unwind with and without the this fix and except for the initial entry the stacktraces are the same. Acked-by: Song Liu Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20251104215405.168643-2-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- arch/x86/events/core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 745caa6c15a3..fa6c47b50989 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2789,13 +2789,13 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re return; } - if (perf_callchain_store(entry, regs->ip)) - return; - - if (perf_hw_regs(regs)) + if (perf_hw_regs(regs)) { + if (perf_callchain_store(entry, regs->ip)) + return; unwind_start(&state, current, regs, NULL); - else + } else { unwind_start(&state, current, NULL, (void *)regs->sp); + } for (; !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); From 20a0bc10272fa17a44fc857c31574a8306f60d20 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Nov 2025 22:54:03 +0100 Subject: [PATCH 0755/1075] x86/fgraph,bpf: Fix stack ORC unwind from kprobe_multi return probe Currently we don't get stack trace via ORC unwinder on top of fgraph exit handler. We can see that when generating stacktrace from kretprobe_multi bpf program which is based on fprobe/fgraph. The reason is that the ORC unwind code won't get pass the return_to_handler callback installed by fgraph return probe machinery. Solving this by creating stack frame in return_to_handler expected by ftrace_graph_ret_addr function to recover original return address and continue with the unwind. Also updating the pt_regs data with cs/flags/rsp which are needed for successful stack retrieval from ebpf bpf_get_stackid helper. - in get_perf_callchain we check user_mode(regs) so CS has to be set - in perf_callchain_kernel we call perf_hw_regs(regs), so EFLAGS/FIXED has to be unset Acked-by: Masami Hiramatsu (Google) Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20251104215405.168643-3-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- arch/x86/include/asm/ftrace.h | 5 +++++ arch/x86/kernel/ftrace_64.S | 8 +++++++- include/linux/ftrace.h | 10 +++++++++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 93156ac4ffe0..b08c95872eed 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -56,6 +56,11 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) return &arch_ftrace_regs(fregs)->regs; } +#define arch_ftrace_partial_regs(regs) do { \ + regs->flags &= ~X86_EFLAGS_FIXED; \ + regs->cs = __KERNEL_CS; \ +} while (0) + #define arch_ftrace_fill_perf_regs(fregs, _regs) do { \ (_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \ (_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \ diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 367da3638167..823dbdd0eb41 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -354,12 +354,17 @@ SYM_CODE_START(return_to_handler) UNWIND_HINT_UNDEFINED ANNOTATE_NOENDBR + /* Restore return_to_handler value that got eaten by previous ret instruction. */ + subq $8, %rsp + UNWIND_HINT_FUNC + /* Save ftrace_regs for function exit context */ subq $(FRAME_SIZE), %rsp movq %rax, RAX(%rsp) movq %rdx, RDX(%rsp) movq %rbp, RBP(%rsp) + movq %rsp, RSP(%rsp) movq %rsp, %rdi call ftrace_return_to_handler @@ -368,7 +373,8 @@ SYM_CODE_START(return_to_handler) movq RDX(%rsp), %rdx movq RAX(%rsp), %rax - addq $(FRAME_SIZE), %rsp + addq $(FRAME_SIZE) + 8, %rsp + /* * Jump back to the old return address. This cannot be JMP_NOSPEC rdi * since IBT would demand that contain ENDBR, which simply isn't so for diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7ded7df6e9b5..07f8c309e432 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -193,6 +193,10 @@ static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs *fregs #if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \ defined(CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS) +#ifndef arch_ftrace_partial_regs +#define arch_ftrace_partial_regs(regs) do {} while (0) +#endif + static __always_inline struct pt_regs * ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) { @@ -202,7 +206,11 @@ ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs) * Since arch_ftrace_get_regs() will check some members and may return * NULL, we can not use it. */ - return &arch_ftrace_regs(fregs)->regs; + regs = &arch_ftrace_regs(fregs)->regs; + + /* Allow arch specific updates to regs. */ + arch_ftrace_partial_regs(regs); + return regs; } #endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || CONFIG_HAVE_FTRACE_REGS_HAVING_PT_REGS */ From c9e208fa93cd66f8077ee15df0728e62b105a687 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Nov 2025 22:54:04 +0100 Subject: [PATCH 0756/1075] selftests/bpf: Add stacktrace ips test for kprobe_multi/kretprobe_multi Adding test that attaches kprobe/kretprobe multi and verifies the ORC stacktrace matches expected functions. Adding bpf_testmod_stacktrace_test function to bpf_testmod kernel module which is called through several functions so we get reliable call path for stacktrace. The test is only for ORC unwinder to keep it simple. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20251104215405.168643-4-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- .../selftests/bpf/prog_tests/stacktrace_ips.c | 104 ++++++++++++++++++ .../selftests/bpf/progs/stacktrace_ips.c | 41 +++++++ .../selftests/bpf/test_kmods/bpf_testmod.c | 26 +++++ 3 files changed, 171 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_ips.c diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c new file mode 100644 index 000000000000..6fca459ba550 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "stacktrace_ips.skel.h" + +#ifdef __x86_64__ +static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...) +{ + __u64 ips[PERF_MAX_STACK_DEPTH]; + struct ksyms *ksyms = NULL; + int i, err = 0; + va_list args; + + /* sorted by addr */ + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) + return -1; + + /* unlikely, but... */ + if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max")) + return -1; + + err = bpf_map_lookup_elem(fd, &key, ips); + if (err) + goto out; + + /* + * Compare all symbols provided via arguments with stacktrace ips, + * and their related symbol addresses.t + */ + va_start(args, cnt); + + for (i = 0; i < cnt; i++) { + unsigned long val; + struct ksym *ksym; + + val = va_arg(args, unsigned long); + ksym = ksym_search_local(ksyms, ips[i]); + if (!ASSERT_OK_PTR(ksym, "ksym_search_local")) + break; + ASSERT_EQ(ksym->addr, val, "stack_cmp"); + } + + va_end(args); + +out: + free_kallsyms_local(ksyms); + return err; +} + +static void test_stacktrace_ips_kprobe_multi(bool retprobe) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, + .retprobe = retprobe + ); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct stacktrace_ips *skel; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts( + skel->progs.kprobe_multi_test, + "bpf_testmod_stacktrace_test", &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void __test_stacktrace_ips(void) +{ + if (test__start_subtest("kprobe_multi")) + test_stacktrace_ips_kprobe_multi(false); + if (test__start_subtest("kretprobe_multi")) + test_stacktrace_ips_kprobe_multi(true); +} +#else +static void __test_stacktrace_ips(void) +{ + test__skip(); +} +#endif + +void test_stacktrace_ips(void) +{ + __test_stacktrace_ips(); +} diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c new file mode 100644 index 000000000000..e2eb30945c1b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include +#include +#include + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 16384); + __type(key, __u32); + __type(value, stack_trace_t); +} stackmap SEC(".maps"); + +extern bool CONFIG_UNWINDER_ORC __kconfig __weak; + +/* + * This function is here to have CONFIG_UNWINDER_ORC + * used and added to object BTF. + */ +int unused(void) +{ + return CONFIG_UNWINDER_ORC ? 0 : 1; +} + +__u32 stack_key; + +SEC("kprobe.multi") +int kprobe_multi_test(struct pt_regs *ctx) +{ + stack_key = bpf_get_stackid(ctx, &stackmap, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index 8074bc5f6f20..ed0a4721d8fd 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -417,6 +417,30 @@ noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d, return a + (long)b + c + d + (long)e + f + g + h + i + j + k; } +noinline void bpf_testmod_stacktrace_test(void) +{ + /* used for stacktrace test as attach function */ + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_3(void) +{ + bpf_testmod_stacktrace_test(); + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_2(void) +{ + bpf_testmod_stacktrace_test_3(); + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_1(void) +{ + bpf_testmod_stacktrace_test_2(); + asm volatile (""); +} + int bpf_testmod_fentry_ok; noinline ssize_t @@ -497,6 +521,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, 21, 22, 23, 24, 25, 26) != 231) goto out; + bpf_testmod_stacktrace_test_1(); + bpf_testmod_fentry_ok = 1; out: return -EIO; /* always fail */ From 3490d29964bdd524366d266b655112cb549c7460 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Nov 2025 22:54:05 +0100 Subject: [PATCH 0757/1075] selftests/bpf: Add stacktrace ips test for raw_tp Adding test that verifies we get expected initial 2 entries from stacktrace for rawtp probe via ORC unwind. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20251104215405.168643-5-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- .../selftests/bpf/prog_tests/stacktrace_ips.c | 46 +++++++++++++++++++ .../selftests/bpf/progs/stacktrace_ips.c | 8 ++++ 2 files changed, 54 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c index 6fca459ba550..c9efdd2a5b18 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -84,12 +84,58 @@ static void test_stacktrace_ips_kprobe_multi(bool retprobe) stacktrace_ips__destroy(skel); } +static void test_stacktrace_ips_raw_tp(void) +{ + __u32 info_len = sizeof(struct bpf_prog_info); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_prog_info info = {}; + struct stacktrace_ips *skel; + __u64 bpf_prog_ksym = 0; + int err; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.rawtp_test = bpf_program__attach_raw_tracepoint( + skel->progs.rawtp_test, + "bpf_testmod_test_read"); + if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint")) + goto cleanup; + + /* get bpf program address */ + info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym); + info.nr_jited_ksyms = 1; + err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test), + &info, &info_len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2, + bpf_prog_ksym, + ksym_get_addr("bpf_trace_run2")); + +cleanup: + stacktrace_ips__destroy(skel); +} + static void __test_stacktrace_ips(void) { if (test__start_subtest("kprobe_multi")) test_stacktrace_ips_kprobe_multi(false); if (test__start_subtest("kretprobe_multi")) test_stacktrace_ips_kprobe_multi(true); + if (test__start_subtest("raw_tp")) + test_stacktrace_ips_raw_tp(); } #else static void __test_stacktrace_ips(void) diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c index e2eb30945c1b..a96c8150d7f5 100644 --- a/tools/testing/selftests/bpf/progs/stacktrace_ips.c +++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c @@ -38,4 +38,12 @@ int kprobe_multi_test(struct pt_regs *ctx) return 0; } +SEC("raw_tp/bpf_testmod_test_read") +int rawtp_test(void *ctx) +{ + /* Skip ebpf program entry in the stack. */ + stack_key = bpf_get_stackid(ctx, &stackmap, 0); + return 0; +} + char _license[] SEC("license") = "GPL"; From b1d16f7c0063b7209fd3251ce40c77d37b477b83 Mon Sep 17 00:00:00 2001 From: Michal Swiatkowski Date: Tue, 4 Nov 2025 09:23:31 -0800 Subject: [PATCH 0758/1075] libie: depend on DEBUG_FS when building LIBIE_FWLOG LIBIE_FWLOG is unusable without DEBUG_FS. Mark it in Kconfig. Fix build error on ixgbe when DEBUG_FS is not set. To not add another layer of #if IS_ENABLED(LIBIE_FWLOG) in ixgbe fwlog code define debugfs dentry even when DEBUG_FS isn't enabled. In this case the dummy functions of LIBIE_FWLOG will be used, so not initialized dentry isn't a problem. Fixes: 641585bc978e ("ixgbe: fwlog support for e610") Reported-by: Guenter Roeck Closes: https://lore.kernel.org/lkml/f594c621-f9e1-49f2-af31-23fbcb176058@roeck-us.net/ Signed-off-by: Michal Swiatkowski Reviewed-by: Simon Horman Reviewed-by: Aleksandr Loktionov Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Link: https://patch.msgid.link/20251104172333.752445-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/Kconfig | 4 ++-- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 -- include/linux/net/intel/libie/fwlog.h | 12 ++++++++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index a563a94e2780..122ee23497e6 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -146,7 +146,7 @@ config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL - select LIBIE_FWLOG + select LIBIE_FWLOG if DEBUG_FS select MDIO select NET_DEVLINK select PLDMFW @@ -298,7 +298,7 @@ config ICE select DIMLIB select LIBIE select LIBIE_ADMINQ - select LIBIE_FWLOG + select LIBIE_FWLOG if DEBUG_FS select NET_DEVLINK select PACKING select PLDMFW diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 14d275270123..dce4936708eb 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -821,9 +821,7 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff *ixgbe_hwmon_buff; #endif /* CONFIG_IXGBE_HWMON */ -#ifdef CONFIG_DEBUG_FS struct dentry *ixgbe_dbg_adapter; -#endif /*CONFIG_DEBUG_FS*/ u8 default_up; /* Bitmask indicating in use pools */ diff --git a/include/linux/net/intel/libie/fwlog.h b/include/linux/net/intel/libie/fwlog.h index 36b13fabca9e..7273c78c826b 100644 --- a/include/linux/net/intel/libie/fwlog.h +++ b/include/linux/net/intel/libie/fwlog.h @@ -78,8 +78,20 @@ struct libie_fwlog { ); }; +#if IS_ENABLED(CONFIG_LIBIE_FWLOG) int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api); void libie_fwlog_deinit(struct libie_fwlog *fwlog); void libie_fwlog_reregister(struct libie_fwlog *fwlog); void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len); +#else +static inline int libie_fwlog_init(struct libie_fwlog *fwlog, + struct libie_fwlog_api *api) +{ + return -EOPNOTSUPP; +} +static inline void libie_fwlog_deinit(struct libie_fwlog *fwlog) { } +static inline void libie_fwlog_reregister(struct libie_fwlog *fwlog) { } +static inline void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, + u16 len) { } +#endif /* CONFIG_LIBIE_FWLOG */ #endif /* _LIBIE_FWLOG_H_ */ From d917c217b612971ea05ae1582e8740b747e0e7e8 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 4 Nov 2025 16:34:35 +0100 Subject: [PATCH 0759/1075] net: gro_cells: Reduce lock scope in gro_cell_poll One GRO-cell device's NAPI callback can nest into the GRO-cell of another device if the underlying device is also using GRO-cell. This is the case for IPsec over vxlan. These two GRO-cells are separate devices. From lockdep's point of view it is the same because each device is sharing the same lock class and so it reports a possible deadlock assuming one device is nesting into itself. Hold the bh_lock only while accessing gro_cell::napi_skbs in gro_cell_poll(). This reduces the locking scope and avoids acquiring the same lock class multiple times. Fixes: 25718fdcbdd2 ("net: gro_cells: Use nested-BH locking for gro_cell") Reported-by: Gal Pressman Closes: https://lore.kernel.org/all/66664116-edb8-48dc-ad72-d5223696dd19@nvidia.com/ Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20251104153435.ty88xDQt@linutronix.de Signed-off-by: Jakub Kicinski --- net/core/gro_cells.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index fd57b845de33..a725d21159a6 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -60,9 +60,10 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) struct sk_buff *skb; int work_done = 0; - __local_lock_nested_bh(&cell->bh_lock); while (work_done < budget) { + __local_lock_nested_bh(&cell->bh_lock); skb = __skb_dequeue(&cell->napi_skbs); + __local_unlock_nested_bh(&cell->bh_lock); if (!skb) break; napi_gro_receive(napi, skb); @@ -71,7 +72,6 @@ static int gro_cell_poll(struct napi_struct *napi, int budget) if (work_done < budget) napi_complete_done(napi, work_done); - __local_unlock_nested_bh(&cell->bh_lock); return work_done; } From d1c94bc5b90c21b65469d30d4a6bc8ed715c1bfe Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Tue, 4 Nov 2025 16:15:36 +0200 Subject: [PATCH 0760/1075] net/mlx5e: Fix return value in case of module EEPROM read error mlx5e_get_module_eeprom_by_page() has weird error handling. First, it is treating -EINVAL as a special case, but it is unclear why. Second, it tries to fail "gracefully" by returning the number of bytes read even in case of an error. This results in wrongly returning success (0 return value) if the error occurs before any bytes were read. Simplify the error handling by returning an error when such occurs. This also aligns with the error handling we have in mlx5e_get_module_eeprom() for the old API. This fixes the following case where the query fails, but userspace ethtool wrongly treats it as success and dumps an output: # ethtool -m eth2 netlink warning: mlx5_core: Query module eeprom by page failed, read 0 bytes, err -5 netlink warning: mlx5_core: Query module eeprom by page failed, read 0 bytes, err -5 Offset Values ------ ------ 0x0000: 00 00 00 00 05 00 04 00 00 00 00 00 05 00 05 00 0x0010: 00 00 00 00 05 00 06 00 50 00 00 00 67 65 20 66 0x0020: 61 69 6c 65 64 2c 20 72 65 61 64 20 30 20 62 79 0x0030: 74 65 73 2c 20 65 72 72 20 2d 35 00 14 00 03 00 0x0040: 08 00 01 00 03 00 00 00 08 00 02 00 1a 00 00 00 0x0050: 14 00 04 00 08 00 01 00 04 00 00 00 08 00 02 00 0x0060: 0e 00 00 00 14 00 05 00 08 00 01 00 05 00 00 00 0x0070: 08 00 02 00 1a 00 00 00 14 00 06 00 08 00 01 00 Fixes: e109d2b204da ("net/mlx5: Implement get_module_eeprom_by_page()") Signed-off-by: Gal Pressman Reviewed-by: Alex Lazar Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762265736-1028868-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 53e5ae252eac..893e1380a7c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -2125,14 +2125,12 @@ static int mlx5e_get_module_eeprom_by_page(struct net_device *netdev, if (!size_read) return i; - if (size_read == -EINVAL) - return -EINVAL; if (size_read < 0) { NL_SET_ERR_MSG_FMT_MOD( extack, "Query module eeprom by page failed, read %u bytes, err %d", i, size_read); - return i; + return size_read; } i += size_read; From ae4789affd1e181ae46e72e2b5fbe2d6d7b6616a Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 4 Nov 2025 16:14:15 +0530 Subject: [PATCH 0761/1075] net: ti: icssg-prueth: Fix fdb hash size configuration The ICSSG driver does the initial FDB configuration which includes setting the control registers. Other run time management like learning is managed by the PRU's. The default FDB hash size used by the firmware is 512 slots, which is currently missing in the current driver. Update the driver FDB config to include FDB hash size as well. Please refer trm [1] 6.4.14.12.17 section on how the FDB config register gets configured. From the table 6-1404, there is a reset field for FDB_HAS_SIZE which is 4, meaning 1024 slots. Currently the driver is not updating this reset value from 4(1024 slots) to 3(512 slots). This patch fixes this by updating the reset value to 512 slots. [1]: https://www.ti.com/lit/pdf/spruim2 Fixes: abd5576b9c57f ("net: ti: icssg-prueth: Add support for ICSSG switch firmware") Signed-off-by: Meghana Malladi Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251104104415.3110537-1-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_config.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c index da53eb04b0a4..3f8237c17d09 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.c +++ b/drivers/net/ethernet/ti/icssg/icssg_config.c @@ -66,6 +66,9 @@ #define FDB_GEN_CFG1 0x60 #define SMEM_VLAN_OFFSET 8 #define SMEM_VLAN_OFFSET_MASK GENMASK(25, 8) +#define FDB_HASH_SIZE_MASK GENMASK(6, 3) +#define FDB_HASH_SIZE_SHIFT 3 +#define FDB_HASH_SIZE 3 #define FDB_GEN_CFG2 0x64 #define FDB_VLAN_EN BIT(6) @@ -463,6 +466,8 @@ void icssg_init_emac_mode(struct prueth *prueth) /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); + regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, FDB_HASH_SIZE_MASK, + FDB_HASH_SIZE << FDB_HASH_SIZE_SHIFT); /* Set enable VLAN aware mode, and FDBs for all PRUs */ regmap_write(prueth->miig_rt, FDB_GEN_CFG2, (FDB_PRU0_EN | FDB_PRU1_EN | FDB_HOST_EN)); prueth->vlan_tbl = (struct prueth_vlan_tbl __force *)(prueth->shram.va + @@ -484,6 +489,8 @@ void icssg_init_fw_offload_mode(struct prueth *prueth) /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); + regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, FDB_HASH_SIZE_MASK, + FDB_HASH_SIZE << FDB_HASH_SIZE_SHIFT); /* Set enable VLAN aware mode, and FDBs for all PRUs */ regmap_write(prueth->miig_rt, FDB_GEN_CFG2, FDB_EN_ALL); prueth->vlan_tbl = (struct prueth_vlan_tbl __force *)(prueth->shram.va + From 665a7e13c220bbde55531a24bd5524320648df10 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 4 Nov 2025 08:48:33 +0200 Subject: [PATCH 0762/1075] net/mlx5e: SHAMPO, Fix header mapping for 64K pages HW-GRO is broken on mlx5 for 64K page sizes. The patch in the fixes tag didn't take into account larger page sizes when doing an align down of max_ksm_entries. For 64K page size, max_ksm_entries is 0 which will skip mapping header pages via WQE UMR. This breaks header-data split and will result in the following syndrome: mlx5_core 0000:00:08.0 eth2: Error cqe on cqn 0x4c9, ci 0x0, qn 0x1133, opcode 0xe, syndrome 0x4, vendor syndrome 0x32 00000000: 00 00 00 00 04 4a 00 00 00 00 00 00 20 00 93 32 00000010: 55 00 00 00 fb cc 00 00 00 00 00 00 07 18 00 00 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 4a 00000030: 00 00 3b c7 93 01 32 04 00 00 00 00 00 00 bf e0 mlx5_core 0000:00:08.0 eth2: ERR CQE on RQ: 0x1133 Furthermore, the function that fills in WQE UMRs for the headers (mlx5e_build_shampo_hd_umr()) only supports mapping page sizes that fit in a single UMR WQE. This patch goes back to the old non-aligned max_ksm_entries value and it changes mlx5e_build_shampo_hd_umr() to support mapping a large page over multiple UMR WQEs. This means that mlx5e_build_shampo_hd_umr() can now leave a page only partially mapped. The caller, mlx5e_alloc_rx_hd_mpwqe(), ensures that there are enough UMR WQEs to cover complete pages by working on ksm_entries that are multiples of MLX5E_SHAMPO_WQ_HEADER_PER_PAGE. Fixes: 8a0ee54027b1 ("net/mlx5e: SHAMPO, Simplify UMR allocation for headers") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762238915-1027590-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 36 +++++++++---------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 26621a2972ec..0c031954ca30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -671,7 +671,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u16 pi, header_offset, err, wqe_bbs; u32 lkey = rq->mdev->mlx5e_res.hw_objs.mkey; struct mlx5e_umr_wqe *umr_wqe; - int headroom, i = 0; + int headroom, i; headroom = rq->buff.headroom; wqe_bbs = MLX5E_KSM_UMR_WQEBBS(ksm_entries); @@ -679,25 +679,24 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); build_ksm_umr(sq, umr_wqe, shampo->mkey_be, index, ksm_entries); - WARN_ON_ONCE(ksm_entries & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)); - while (i < ksm_entries) { - struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); + for (i = 0; i < ksm_entries; i++, index++) { + struct mlx5e_frag_page *frag_page; u64 addr; - err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); - if (unlikely(err)) - goto err_unmap; + frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); + header_offset = mlx5e_shampo_hd_offset(index); + if (!header_offset) { + err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, + frag_page); + if (err) + goto err_unmap; + } addr = page_pool_get_dma_addr_netmem(frag_page->netmem); - - for (int j = 0; j < MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; j++) { - header_offset = mlx5e_shampo_hd_offset(index++); - - umr_wqe->inline_ksms[i++] = (struct mlx5_ksm) { - .key = cpu_to_be32(lkey), - .va = cpu_to_be64(addr + header_offset + headroom), - }; - } + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(lkey), + .va = cpu_to_be64(addr + header_offset + headroom), + }; } sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { @@ -713,7 +712,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, return 0; err_unmap: - while (--i) { + while (--i >= 0) { --index; header_offset = mlx5e_shampo_hd_offset(index); if (!header_offset) { @@ -735,8 +734,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) struct mlx5e_icosq *sq = rq->icosq; int i, err, max_ksm_entries, len; - max_ksm_entries = ALIGN_DOWN(MLX5E_MAX_KSM_PER_WQE(rq->mdev), - MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); + max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev); ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); From bacd8d80181ebe34b599a39aa26bf73a44c91e55 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 4 Nov 2025 08:48:34 +0200 Subject: [PATCH 0763/1075] net/mlx5e: SHAMPO, Fix skb size check for 64K pages mlx5e_hw_gro_skb_has_enough_space() uses a formula to check if there is enough space in the skb frags to store more data. This formula is incorrect for 64K page sizes and it triggers early GRO session termination because the first fragment will blow up beyond GRO_LEGACY_MAX_SIZE. This patch adds a special case for page sizes >= GRO_LEGACY_MAX_SIZE (64K) which uses the skb->len instead. Within this context, the check is safe from fragment overflow because the hardware will continuously fill the data up to the reservation size of 64K and the driver will coalesce all data from the same page to the same fragment. This means that the data will span one fragment or at most two for such a large page size. It is expected that the if statement will be optimized out as the check is done with constants. Fixes: 92552d3abd32 ("net/mlx5e: HW_GRO cqe handler implementation") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762238915-1027590-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0c031954ca30..f2a06752ce37 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2354,7 +2354,10 @@ mlx5e_hw_gro_skb_has_enough_space(struct sk_buff *skb, u16 data_bcnt) { int nr_frags = skb_shinfo(skb)->nr_frags; - return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; + if (PAGE_SIZE >= GRO_LEGACY_MAX_SIZE) + return skb->len + data_bcnt <= GRO_LEGACY_MAX_SIZE; + else + return PAGE_SIZE * nr_frags + data_bcnt <= GRO_LEGACY_MAX_SIZE; } static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) From d8a7ed9586c7579a99e9e2d90988c9eceeee61ff Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Tue, 4 Nov 2025 08:48:35 +0200 Subject: [PATCH 0764/1075] net/mlx5e: SHAMPO, Fix header formulas for higher MTUs and 64K pages The MLX5E_SHAMPO_WQ_HEADER_PER_PAGE and MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE macros are used directly in several places under the assumption that there will always be more headers per WQE than headers per page. However, this assumption doesn't hold for 64K page sizes and higher MTUs (> 4K). This can be first observed during header page allocation: ksm_entries will become 0 during alignment to MLX5E_SHAMPO_WQ_HEADER_PER_PAGE. This patch introduces 2 additional members to the mlx5e_shampo_hd struct which are meant to be used instead of the macrose mentioned above. When the number of headers per WQE goes below MLX5E_SHAMPO_WQ_HEADER_PER_PAGE, clamp the number of headers per page and expand the header size accordingly so that the headers for one WQE cover a full page. All the formulas are adapted to use these two new members. Fixes: 945ca432bfd0 ("net/mlx5e: SHAMPO, Drop info array") Signed-off-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1762238915-1027590-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 ++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++++++++++--- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 33 +++++++++++-------- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 14e3207b14e7..a163f81f07c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -634,7 +634,10 @@ struct mlx5e_dma_info { struct mlx5e_shampo_hd { struct mlx5e_frag_page *pages; u32 hd_per_wq; + u32 hd_per_page; u16 hd_per_wqe; + u8 log_hd_per_page; + u8 log_hd_entry_size; unsigned long *bitmap; u16 pi; u16 ci; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9c46511e7b43..6023bbbf3f39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -791,8 +791,9 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, int node) { void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq); + u8 log_hd_per_page, log_hd_entry_size; + u16 hd_per_wq, hd_per_wqe; u32 hd_pool_size; - u16 hd_per_wq; int wq_size; int err; @@ -815,11 +816,24 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, if (err) goto err_umr_mkey; - rq->mpwqe.shampo->hd_per_wqe = - mlx5e_shampo_hd_per_wqe(mdev, params, rqp); + hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rqp); wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); - hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / - MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; + + BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT); + if (hd_per_wqe >= MLX5E_SHAMPO_WQ_HEADER_PER_PAGE) { + log_hd_per_page = MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE; + log_hd_entry_size = MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + } else { + log_hd_per_page = order_base_2(hd_per_wqe); + log_hd_entry_size = order_base_2(PAGE_SIZE / hd_per_wqe); + } + + rq->mpwqe.shampo->hd_per_wqe = hd_per_wqe; + rq->mpwqe.shampo->hd_per_page = BIT(log_hd_per_page); + rq->mpwqe.shampo->log_hd_per_page = log_hd_per_page; + rq->mpwqe.shampo->log_hd_entry_size = log_hd_entry_size; + + hd_pool_size = (hd_per_wqe * wq_size) >> log_hd_per_page; if (netif_rxq_has_unreadable_mp(rq->netdev, rq->ix)) { /* Separate page pool for shampo headers */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f2a06752ce37..687cf123211d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -648,17 +648,20 @@ static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, umr_wqe->hdr.uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } -static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, int header_index) +static struct mlx5e_frag_page *mlx5e_shampo_hd_to_frag_page(struct mlx5e_rq *rq, + int header_index) { - BUILD_BUG_ON(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE > PAGE_SHIFT); + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - return &rq->mpwqe.shampo->pages[header_index >> MLX5E_SHAMPO_LOG_WQ_HEADER_PER_PAGE]; + return &shampo->pages[header_index >> shampo->log_hd_per_page]; } -static u64 mlx5e_shampo_hd_offset(int header_index) +static u64 mlx5e_shampo_hd_offset(struct mlx5e_rq *rq, int header_index) { - return (header_index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << - MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u32 hd_per_page = shampo->hd_per_page; + + return (header_index & (hd_per_page - 1)) << shampo->log_hd_entry_size; } static void mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index); @@ -684,7 +687,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, u64 addr; frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); - header_offset = mlx5e_shampo_hd_offset(index); + header_offset = mlx5e_shampo_hd_offset(rq, index); if (!header_offset) { err = mlx5e_page_alloc_fragmented(rq->hd_page_pool, frag_page); @@ -714,7 +717,7 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, err_unmap: while (--i >= 0) { --index; - header_offset = mlx5e_shampo_hd_offset(index); + header_offset = mlx5e_shampo_hd_offset(rq, index); if (!header_offset) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, index); @@ -738,7 +741,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); - ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); + ksm_entries = ALIGN_DOWN(ksm_entries, shampo->hd_per_page); if (!ksm_entries) return 0; @@ -856,7 +859,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) { + if (((header_index + 1) & (shampo->hd_per_page - 1)) == 0) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); mlx5e_page_release_fragmented(rq->hd_page_pool, frag_page); @@ -1223,9 +1226,10 @@ static unsigned int mlx5e_lro_update_hdr(struct sk_buff *skb, static void *mlx5e_shampo_get_packet_hd(struct mlx5e_rq *rq, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - u16 head_offset = mlx5e_shampo_hd_offset(header_index) + rq->buff.headroom; + u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index); + void *addr = netmem_address(frag_page->netmem); - return netmem_address(frag_page->netmem) + head_offset; + return addr + head_offset + rq->buff.headroom; } static void mlx5e_shampo_update_ipv4_udp_hdr(struct mlx5e_rq *rq, struct iphdr *ipv4) @@ -2265,7 +2269,8 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { struct mlx5e_frag_page *frag_page = mlx5e_shampo_hd_to_frag_page(rq, header_index); - u16 head_offset = mlx5e_shampo_hd_offset(header_index); + u16 head_offset = mlx5e_shampo_hd_offset(rq, header_index); + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; u16 head_size = cqe->shampo.header_size; u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb = NULL; @@ -2281,7 +2286,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, data = hdr + rx_headroom; frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + head_size); - if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) { + if (likely(frag_size <= BIT(shampo->log_hd_entry_size))) { /* build SKB around header */ dma_sync_single_range_for_cpu(rq->pdev, dma_addr, 0, frag_size, rq->buff.map_dir); net_prefetchw(hdr); From a04ea57aae375bdda1cb57034d8bcbb351e1f973 Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Tue, 4 Nov 2025 14:23:21 +0800 Subject: [PATCH 0765/1075] net: libwx: fix device bus LAN ID The device bus LAN ID was obtained from PCI_FUNC(), but when a PF port is passthrough to a virtual machine, the function number may not match the actual port index on the device. This could cause the driver to perform operations such as LAN reset on the wrong port. Fix this by reading the LAN ID from port status register. Fixes: a34b3e6ed8fb ("net: txgbe: Store PCI info") Cc: stable@vger.kernel.org Signed-off-by: Jiawen Wu Reviewed-by: Simon Horman Link: https://patch.msgid.link/B60A670C1F52CB8E+20251104062321.40059-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_hw.c | 3 ++- drivers/net/ethernet/wangxun/libwx/wx_type.h | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index 1e2713f0c921..b37d6cfbfbe9 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -2427,7 +2427,8 @@ int wx_sw_init(struct wx *wx) wx->oem_svid = pdev->subsystem_vendor; wx->oem_ssid = pdev->subsystem_device; wx->bus.device = PCI_SLOT(pdev->devfn); - wx->bus.func = PCI_FUNC(pdev->devfn); + wx->bus.func = FIELD_GET(WX_CFG_PORT_ST_LANID, + rd32(wx, WX_CFG_PORT_ST)); if (wx->oem_svid == PCI_VENDOR_ID_WANGXUN || pdev->is_virtfn) { diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index d89b9b8a0a2c..2f8319e03182 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -97,6 +97,8 @@ #define WX_CFG_PORT_CTL_DRV_LOAD BIT(3) #define WX_CFG_PORT_CTL_QINQ BIT(2) #define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/ +#define WX_CFG_PORT_ST 0x14404 +#define WX_CFG_PORT_ST_LANID GENMASK(9, 8) #define WX_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4)) #define WX_CFG_PORT_CTL_NUM_VT_MASK GENMASK(13, 12) /* number of TVs */ @@ -557,8 +559,6 @@ enum WX_MSCA_CMD_value { #define TXD_USE_COUNT(S) DIV_ROUND_UP((S), WX_MAX_DATA_PER_TXD) #define DESC_NEEDED (MAX_SKB_FRAGS + 4) -#define WX_CFG_PORT_ST 0x14404 - /******************* Receive Descriptor bit definitions **********************/ #define WX_RXD_STAT_DD BIT(0) /* Done */ #define WX_RXD_STAT_EOP BIT(1) /* End of Packet */ From 4d6ec3a7932ca5b168426f7b5b40abab2b41d2da Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Wed, 5 Nov 2025 11:47:16 +0800 Subject: [PATCH 0766/1075] net: wan: framer: pef2256: Switch to devm_mfd_add_devices() The driver calls mfd_add_devices() but fails to call mfd_remove_devices() in error paths after successful MFD device registration and in the remove function. This leads to resource leaks where MFD child devices are not properly unregistered. Replace mfd_add_devices with devm_mfd_add_devices to automatically manage the device resources. Fixes: c96e976d9a05 ("net: wan: framer: Add support for the Lantiq PEF2256 framer") Suggested-by: Herve Codina Signed-off-by: Haotian Zhang Acked-by: Herve Codina Link: https://patch.msgid.link/20251105034716.662-1-vulab@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/wan/framer/pef2256/pef2256.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/framer/pef2256/pef2256.c b/drivers/net/wan/framer/pef2256/pef2256.c index c5501826db1e..c058cc79137d 100644 --- a/drivers/net/wan/framer/pef2256/pef2256.c +++ b/drivers/net/wan/framer/pef2256/pef2256.c @@ -648,7 +648,8 @@ static int pef2256_add_audio_devices(struct pef2256 *pef2256) audio_devs[i].id = i; } - ret = mfd_add_devices(pef2256->dev, 0, audio_devs, count, NULL, 0, NULL); + ret = devm_mfd_add_devices(pef2256->dev, 0, audio_devs, count, + NULL, 0, NULL); kfree(audio_devs); return ret; } @@ -822,8 +823,8 @@ static int pef2256_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pef2256); - ret = mfd_add_devices(pef2256->dev, 0, pef2256_devs, - ARRAY_SIZE(pef2256_devs), NULL, 0, NULL); + ret = devm_mfd_add_devices(pef2256->dev, 0, pef2256_devs, + ARRAY_SIZE(pef2256_devs), NULL, 0, NULL); if (ret) { dev_err(pef2256->dev, "add devices failed (%d)\n", ret); return ret; From 59b0afd01b2ce353ab422ea9c8375b03db313a21 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 27 Oct 2025 23:09:34 +0800 Subject: [PATCH 0767/1075] crypto: hisilicon/qm - Fix device reference leak in qm_get_qos_value The qm_get_qos_value() function calls bus_find_device_by_name() which increases the device reference count, but fails to call put_device() to balance the reference count and lead to a device reference leak. Add put_device() calls in both the error path and success path to properly balance the reference count. Found via static analysis. Fixes: 22d7a6c39cab ("crypto: hisilicon/qm - add pci bdf number check") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Reviewed-by: Longfang Liu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index a5b96adf2d1e..3b391a146635 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -3871,10 +3871,12 @@ static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf, pdev = container_of(dev, struct pci_dev, dev); if (pci_physfn(pdev) != qm->pdev) { pci_err(qm->pdev, "the pdev input does not match the pf!\n"); + put_device(dev); return -EINVAL; } *fun_index = pdev->devfn; + put_device(dev); return 0; } From c379b745e12a99f0a54bafaaf75fc710614511ce Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 3 Nov 2025 13:24:15 +0100 Subject: [PATCH 0768/1075] slab: prevent infinite loop in kmalloc_nolock() with debugging In review of a followup work, Harry noticed a potential infinite loop. Upon closed inspection, it already exists for kmalloc_nolock() on a cache with debugging enabled, since commit af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") When alloc_single_from_new_slab() fails to trylock node list_lock, we keep retrying to get partial slab or allocate a new slab. If we indeed interrupted somebody holding the list_lock, the trylock fill fail deterministically and we end up allocating and defer-freeing slabs indefinitely with no progress. To fix it, fail the allocation if spinning is not allowed. This is acceptable in the restricted context of kmalloc_nolock(), especially with debugging enabled. Reported-by: Harry Yoo Closes: https://lore.kernel.org/all/aQLqZjjq1SPD3Fml@hyeyoo/ Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") Acked-by: Alexei Starovoitov Reviewed-by: Harry Yoo Link: https://patch.msgid.link/20251103-fix-nolock-loop-v1-1-6e2b3e82b9da@suse.cz Signed-off-by: Vlastimil Babka --- mm/slub.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index d4367f25b20d..f1a5373eee7b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4666,8 +4666,12 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, if (kmem_cache_debug(s)) { freelist = alloc_single_from_new_slab(s, slab, orig_size, gfpflags); - if (unlikely(!freelist)) + if (unlikely(!freelist)) { + /* This could cause an endless loop. Fail instead. */ + if (!allow_spin) + return NULL; goto new_objects; + } if (s->flags & SLAB_STORE_USER) set_track(s, freelist, TRACK_ALLOC, addr, From d8a823c6f04ef03e3bd7249d2e796da903e7238d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 4 Nov 2025 05:43:01 -0500 Subject: [PATCH 0769/1075] xfs: free xfs_busy_extents structure when no RT extents are queued kmemleak occasionally reports leaking xfs_busy_extents structure from xfs_scrub calls after running xfs/528 (but attributed to following tests), which seems to be caused by not freeing the xfs_busy_extents structure when tr.queued is 0 and xfs_trim_rtgroup_extents breaks out of the main loop. Free the structure in this case. Fixes: a3315d11305f ("xfs: use rtgroup busy extent list for FITRIM") Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_discard.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index ee49f20875af..6917de832191 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -726,8 +726,10 @@ xfs_trim_rtgroup_extents( break; } - if (!tr.queued) + if (!tr.queued) { + kfree(tr.extents); break; + } /* * We hand the extent list to the discard function here so the From 82420bd4e17bdaba8453fbf9e10c58c9ed0c9727 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 6 Nov 2025 11:46:45 +0100 Subject: [PATCH 0770/1075] ALSA: hda/hdmi: Fix breakage at probing nvhdmi-mcp driver After restructuring and splitting the HDMI codec driver code, each HDMI codec driver contains the own build_controls and build_pcms ops. A copy-n-paste error put the wrong entries for nvhdmi-mcp driver; both build_controls and build_pcms are swapped. Unfortunately both callbacks have the very same form, and the compiler didn't complain it, either. This resulted in a NULL dereference because the PCM instance hasn't been initialized at calling the build_controls callback. Fix it by passing the proper entries. Fixes: ad781b550f9a ("ALSA: hda/hdmi: Rewrite to new probe method") Cc: Link: https://bugzilla.kernel.org/show_bug.cgi?id=220743 Link: https://patch.msgid.link/20251106104647.25805-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/hda/codecs/hdmi/nvhdmi-mcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/hda/codecs/hdmi/nvhdmi-mcp.c b/sound/hda/codecs/hdmi/nvhdmi-mcp.c index 8fd8d76fa72f..1c5fdfe872f2 100644 --- a/sound/hda/codecs/hdmi/nvhdmi-mcp.c +++ b/sound/hda/codecs/hdmi/nvhdmi-mcp.c @@ -350,8 +350,8 @@ static int nvhdmi_mcp_probe(struct hda_codec *codec, static const struct hda_codec_ops nvhdmi_mcp_codec_ops = { .probe = nvhdmi_mcp_probe, .remove = snd_hda_hdmi_simple_remove, - .build_controls = nvhdmi_mcp_build_pcms, - .build_pcms = nvhdmi_mcp_build_controls, + .build_pcms = nvhdmi_mcp_build_pcms, + .build_controls = nvhdmi_mcp_build_controls, .init = nvhdmi_mcp_init, .unsol_event = snd_hda_hdmi_simple_unsol_event, }; From 956dfda6a70885f18c0f8236a461aa2bc4f556ad Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Thu, 30 Oct 2025 11:27:55 +0800 Subject: [PATCH 0771/1075] sched/fair: Prevent cfs_rq from being unthrottled with zero runtime_remaining When a cfs_rq is to be throttled, its limbo list should be empty and that's why there is a warn in tg_throttle_down() for non empty cfs_rq->throttled_limbo_list. When running a test with the following hierarchy: root / \ A* ... / | \ ... B / \ C* where both A and C have quota settings, that warn on non empty limbo list is triggered for a cfs_rq of C, let's call it cfs_rq_c(and ignore the cpu part of the cfs_rq for the sake of simpler representation). Debug showed it happened like this: Task group C is created and quota is set, so in tg_set_cfs_bandwidth(), cfs_rq_c is initialized with runtime_enabled set, runtime_remaining equals to 0 and *unthrottled*. Before any tasks are enqueued to cfs_rq_c, *multiple* throttled tasks can migrate to cfs_rq_c (e.g., due to task group changes). When enqueue_task_fair(cfs_rq_c, throttled_task) is called and cfs_rq_c is in a throttled hierarchy (e.g., A is throttled), these throttled tasks are directly placed into cfs_rq_c's limbo list by enqueue_throttled_task(). Later, when A is unthrottled, tg_unthrottle_up(cfs_rq_c) enqueues these tasks. The first enqueue triggers check_enqueue_throttle(), and with zero runtime_remaining, cfs_rq_c can be throttled in throttle_cfs_rq() if it can't get more runtime and enters tg_throttle_down(), where the warning is hit due to remaining tasks in the limbo list. I think it's a chaos to trigger throttle on unthrottle path, the status of a being unthrottled cfs_rq can be in a mixed state in the end, so fix this by granting 1ns to cfs_rq in tg_set_cfs_bandwidth(). This ensures cfs_rq_c has a positive runtime_remaining when initialized as unthrottled and cannot enter tg_unthrottle_up() with zero runtime_remaining. Also, update outdated comments in tg_throttle_down() since unthrottle_cfs_rq() is no longer called with zero runtime_remaining. While at it, remove a redundant assignment to se in tg_throttle_down(). Fixes: e1fad12dcb66 ("sched/fair: Switch to task based throttle model") Reviewed-By: Benjamin Segall Suggested-by: Benjamin Segall Signed-off-by: Aaron Lu Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: K Prateek Nayak Tested-by: K Prateek Nayak Tested-by: Hao Jia Link: https://patch.msgid.link/20251030032755.560-1-ziqianlu@bytedance.com --- kernel/sched/core.c | 2 +- kernel/sched/fair.c | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f1ebf67b48e2..f754a60de848 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9606,7 +9606,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, guard(rq_lock_irq)(rq); cfs_rq->runtime_enabled = runtime_enabled; - cfs_rq->runtime_remaining = 0; + cfs_rq->runtime_remaining = 1; if (cfs_rq->throttled) unthrottle_cfs_rq(cfs_rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 25970dbbb279..5b752324270b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6024,20 +6024,17 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)]; /* - * It's possible we are called with !runtime_remaining due to things - * like user changed quota setting(see tg_set_cfs_bandwidth()) or async - * unthrottled us with a positive runtime_remaining but other still - * running entities consumed those runtime before we reached here. + * It's possible we are called with runtime_remaining < 0 due to things + * like async unthrottled us with a positive runtime_remaining but other + * still running entities consumed those runtime before we reached here. * - * Anyway, we can't unthrottle this cfs_rq without any runtime remaining - * because any enqueue in tg_unthrottle_up() will immediately trigger a - * throttle, which is not supposed to happen on unthrottle path. + * We can't unthrottle this cfs_rq without any runtime remaining because + * any enqueue in tg_unthrottle_up() will immediately trigger a throttle, + * which is not supposed to happen on unthrottle path. */ if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0) return; - se = cfs_rq->tg->se[cpu_of(rq)]; - cfs_rq->throttled = 0; update_rq_clock(rq); From 4cb5ac2626b5704ed712ac1d46b9d89fdfc12c5d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Jul 2025 16:29:46 +0200 Subject: [PATCH 0772/1075] futex: Optimize per-cpu reference counting Shrikanth noted that the per-cpu reference counter was still some 10% slower than the old immutable option (which removes the reference counting entirely). Further optimize the per-cpu reference counter by: - switching from RCU to preempt; - using __this_cpu_*() since we now have preempt disabled; - switching from smp_load_acquire() to READ_ONCE(). This is all safe because disabling preemption inhibits the RCU grace period exactly like rcu_read_lock(). Having preemption disabled allows using __this_cpu_*() provided the only access to the variable is in task context -- which is the case here. Furthermore, since we know changing fph->state to FR_ATOMIC demands a full RCU grace period we can rely on the implied smp_mb() from that to replace the acquire barrier(). This is very similar to the percpu_down_read_internal() fast-path. The reason this is significant for PowerPC is that it uses the generic this_cpu_*() implementation which relies on local_irq_disable() (the x86 implementation relies on it being a single memop instruction to be IRQ-safe). Switching to preempt_disable() and __this_cpu*() avoids this IRQ state swizzling. Also, PowerPC needs LWSYNC for the ACQUIRE barrier, not having to use explicit barriers safes a bunch. Combined this reduces the performance gap by half, down to some 5%. Fixes: 760e6f7befba ("futex: Remove support for IMMUTABLE") Reported-by: Shrikanth Hegde Tested-by: Shrikanth Hegde Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20251106092929.GR4067720@noisy.programming.kicks-ass.net --- kernel/futex/core.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 125804fbb5cb..2e77a6e5c865 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -1680,10 +1680,10 @@ static bool futex_ref_get(struct futex_private_hash *fph) { struct mm_struct *mm = fph->mm; - guard(rcu)(); + guard(preempt)(); - if (smp_load_acquire(&fph->state) == FR_PERCPU) { - this_cpu_inc(*mm->futex_ref); + if (READ_ONCE(fph->state) == FR_PERCPU) { + __this_cpu_inc(*mm->futex_ref); return true; } @@ -1694,10 +1694,10 @@ static bool futex_ref_put(struct futex_private_hash *fph) { struct mm_struct *mm = fph->mm; - guard(rcu)(); + guard(preempt)(); - if (smp_load_acquire(&fph->state) == FR_PERCPU) { - this_cpu_dec(*mm->futex_ref); + if (READ_ONCE(fph->state) == FR_PERCPU) { + __this_cpu_dec(*mm->futex_ref); return false; } From b750f5a9d64df6cfe9103c7feb7314694318818d Mon Sep 17 00:00:00 2001 From: LiangCheng Wang Date: Tue, 28 Oct 2025 10:55:38 +0800 Subject: [PATCH 0773/1075] drm/tiny: pixpaper: add explicit dependency on MMU The DRM_GEM_SHMEM_HELPER helper requires MMU enabled because it uses vmf_insert_pfn() in its mmap implementation. On NOMMU configurations (e.g. some RISC-V randconfig builds), this symbol is unavailable and selecting DRM_GEM_SHMEM_HELPER causes a modpost undefined reference: ERROR: modpost: "vmf_insert_pfn" [drivers/gpu/drm/drm_shmem_helper.ko] undefined! Normally, Kconfig prevents this helper from being selected when CONFIG_MMU=n. However, in some randconfig builds (such as those used by 0day CI), select statements can override unmet dependencies, triggering the issue. Add an explicit dependency on MMU to DRM_PIXPAPER to prevent this. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202510280213.0rlYA4T3-lkp@intel.com/ Fixes: 0c4932f6ddf8 ("drm/tiny: pixpaper: Fix missing dependency on DRM_GEM_SHMEM_HELPER") Acked-by: Thomas Zimmermann Signed-off-by: LiangCheng Wang Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20251028-bar-v1-1-edfbd13fafff@gmail.com --- drivers/gpu/drm/tiny/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig index 7d9e85e932d7..f0e72d4b6a47 100644 --- a/drivers/gpu/drm/tiny/Kconfig +++ b/drivers/gpu/drm/tiny/Kconfig @@ -85,6 +85,7 @@ config DRM_PANEL_MIPI_DBI config DRM_PIXPAPER tristate "DRM support for PIXPAPER display panels" depends on DRM && SPI + depends on MMU select DRM_CLIENT_SELECTION select DRM_GEM_SHMEM_HELPER select DRM_KMS_HELPER From 6b6eddc63ce871897d3a5bc4f8f593e698aef104 Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Wed, 5 Nov 2025 14:22:46 +0800 Subject: [PATCH 0774/1075] ASoC: cs4271: Fix regulator leak on probe failure The probe function enables regulators at the beginning but fails to disable them in its error handling path. If any operation after enabling the regulators fails, the probe will exit with an error, leaving the regulators permanently enabled, which could lead to a resource leak. Add a proper error handling path to call regulator_bulk_disable() before returning an error. Fixes: 9a397f473657 ("ASoC: cs4271: add regulator consumer support") Signed-off-by: Haotian Zhang Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20251105062246.1955-1-vulab@iscas.ac.cn Signed-off-by: Mark Brown --- sound/soc/codecs/cs4271.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/cs4271.c b/sound/soc/codecs/cs4271.c index 6a3cca3d26c7..ead447a5da7f 100644 --- a/sound/soc/codecs/cs4271.c +++ b/sound/soc/codecs/cs4271.c @@ -581,17 +581,17 @@ static int cs4271_component_probe(struct snd_soc_component *component) ret = regcache_sync(cs4271->regmap); if (ret < 0) - return ret; + goto err_disable_regulator; ret = regmap_update_bits(cs4271->regmap, CS4271_MODE2, CS4271_MODE2_PDN | CS4271_MODE2_CPEN, CS4271_MODE2_PDN | CS4271_MODE2_CPEN); if (ret < 0) - return ret; + goto err_disable_regulator; ret = regmap_update_bits(cs4271->regmap, CS4271_MODE2, CS4271_MODE2_PDN, 0); if (ret < 0) - return ret; + goto err_disable_regulator; /* Power-up sequence requires 85 uS */ udelay(85); @@ -601,6 +601,10 @@ static int cs4271_component_probe(struct snd_soc_component *component) CS4271_MODE2_MUTECAEQUB); return 0; + +err_disable_regulator: + regulator_bulk_disable(ARRAY_SIZE(cs4271->supplies), cs4271->supplies); + return ret; } static void cs4271_component_remove(struct snd_soc_component *component) From 1a58d865f423f4339edf59053e496089075fa950 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 29 Oct 2025 15:17:58 +0800 Subject: [PATCH 0775/1075] ASoC: sdw_utils: fix device reference leak in is_sdca_endpoint_present() The bus_find_device_by_name() function returns a device pointer with an incremented reference count, but the original code was missing put_device() calls in some return paths, leading to reference count leaks. Fix this by ensuring put_device() is called before function exit after bus_find_device_by_name() succeeds This follows the same pattern used elsewhere in the kernel where bus_find_device_by_name() is properly paired with put_device(). Found via static analysis and code review. Fixes: 4f8ef33dd44a ("ASoC: soc_sdw_utils: skip the endpoint that doesn't present") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Link: https://patch.msgid.link/20251029071804.8425-1-linmq006@gmail.com Signed-off-by: Mark Brown --- sound/soc/sdw_utils/soc_sdw_utils.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index f7c8c16308de..3848c7df1916 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -1277,7 +1277,7 @@ static int is_sdca_endpoint_present(struct device *dev, struct sdw_slave *slave; struct device *sdw_dev; const char *sdw_codec_name; - int i; + int ret, i; dlc = kzalloc(sizeof(*dlc), GFP_KERNEL); if (!dlc) @@ -1307,13 +1307,16 @@ static int is_sdca_endpoint_present(struct device *dev, } slave = dev_to_sdw_dev(sdw_dev); - if (!slave) - return -EINVAL; + if (!slave) { + ret = -EINVAL; + goto put_device; + } /* Make sure BIOS provides SDCA properties */ if (!slave->sdca_data.interface_revision) { dev_warn(&slave->dev, "SDCA properties not found in the BIOS\n"); - return 1; + ret = 1; + goto put_device; } for (i = 0; i < slave->sdca_data.num_functions; i++) { @@ -1322,7 +1325,8 @@ static int is_sdca_endpoint_present(struct device *dev, if (dai_type == dai_info->dai_type) { dev_dbg(&slave->dev, "DAI type %d sdca function %s found\n", dai_type, slave->sdca_data.function[i].name); - return 1; + ret = 1; + goto put_device; } } @@ -1330,7 +1334,11 @@ static int is_sdca_endpoint_present(struct device *dev, "SDCA device function for DAI type %d not supported, skip endpoint\n", dai_info->dai_type); - return 0; + ret = 0; + +put_device: + put_device(sdw_dev); + return ret; } int asoc_sdw_parse_sdw_endpoints(struct snd_soc_card *card, From 84f5526e4dce0a44d050ceb1b1bf21d43016d91b Mon Sep 17 00:00:00 2001 From: Niranjan H Y Date: Thu, 30 Oct 2025 20:46:37 +0530 Subject: [PATCH 0776/1075] ASoC: tas2783A: Fix issues in firmware parsing During firmware download, if the size of the firmware is too small, it wrongly assumes the firmware download is successful. If there is size mismatch with chunk's header, invalid memory is accessed. Fix these issues by throwing error during these cases. Fixes: 4cc9bd8d7b32 (ASoc: tas2783A: Add soundwire based codec driver) Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202510291226.2R3fbYNh-lkp@intel.com/ Signed-off-by: Niranjan H Y Link: https://patch.msgid.link/20251030151637.566-1-niranjan.hy@ti.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2783-sdw.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2783-sdw.c b/sound/soc/codecs/tas2783-sdw.c index 1fb4227b711e..e273b80d033e 100644 --- a/sound/soc/codecs/tas2783-sdw.c +++ b/sound/soc/codecs/tas2783-sdw.c @@ -762,10 +762,17 @@ static void tas2783_fw_ready(const struct firmware *fmw, void *context) goto out; } - mutex_lock(&tas_dev->pde_lock); img_sz = fmw->size; buf = fmw->data; offset += FW_DL_OFFSET; + if (offset >= (img_sz - FW_FL_HDR)) { + dev_err(tas_dev->dev, + "firmware is too small"); + ret = -EINVAL; + goto out; + } + + mutex_lock(&tas_dev->pde_lock); while (offset < (img_sz - FW_FL_HDR)) { memset(&hdr, 0, sizeof(hdr)); offset += read_header(&buf[offset], &hdr); @@ -776,6 +783,14 @@ static void tas2783_fw_ready(const struct firmware *fmw, void *context) /* size also includes the header */ file_blk_size = hdr.length - FW_FL_HDR; + /* make sure that enough data is there */ + if (offset + file_blk_size > img_sz) { + ret = -EINVAL; + dev_err(tas_dev->dev, + "corrupt firmware file"); + break; + } + switch (hdr.file_id) { case 0: ret = sdw_nwrite_no_pm(tas_dev->sdw_peripheral, @@ -808,7 +823,8 @@ static void tas2783_fw_ready(const struct firmware *fmw, void *context) break; } mutex_unlock(&tas_dev->pde_lock); - tas2783_update_calibdata(tas_dev); + if (!ret) + tas2783_update_calibdata(tas_dev); out: if (!ret) From 86d57d9c07d54e8cb385ffe800930816ccdba0c1 Mon Sep 17 00:00:00 2001 From: Robin Gong Date: Fri, 24 Oct 2025 13:53:20 +0800 Subject: [PATCH 0777/1075] spi: imx: keep dma request disabled before dma transfer setup Since sdma hardware configure postpone to transfer phase, have to disable dma request before dma transfer setup because there is a hardware limitation on sdma event enable(ENBLn) as below: "It is thus essential for the Arm platform to program them before any DMA request is triggered to the SDMA, otherwise an unpredictable combination of channels may be started." Signed-off-by: Carlos Song Signed-off-by: Robin Gong Link: https://patch.msgid.link/20251024055320.408482-1-carlos.song@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 155ddeb8fcd4..bbf1fd4fe1e9 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -519,9 +519,15 @@ static void mx51_ecspi_trigger(struct spi_imx_data *spi_imx) { u32 reg; - reg = readl(spi_imx->base + MX51_ECSPI_CTRL); - reg |= MX51_ECSPI_CTRL_XCH; - writel(reg, spi_imx->base + MX51_ECSPI_CTRL); + if (spi_imx->usedma) { + reg = readl(spi_imx->base + MX51_ECSPI_DMA); + reg |= MX51_ECSPI_DMA_TEDEN | MX51_ECSPI_DMA_RXDEN; + writel(reg, spi_imx->base + MX51_ECSPI_DMA); + } else { + reg = readl(spi_imx->base + MX51_ECSPI_CTRL); + reg |= MX51_ECSPI_CTRL_XCH; + writel(reg, spi_imx->base + MX51_ECSPI_CTRL); + } } static void mx51_ecspi_disable(struct spi_imx_data *spi_imx) @@ -759,7 +765,6 @@ static void mx51_setup_wml(struct spi_imx_data *spi_imx) writel(MX51_ECSPI_DMA_RX_WML(spi_imx->wml - 1) | MX51_ECSPI_DMA_TX_WML(tx_wml) | MX51_ECSPI_DMA_RXT_WML(spi_imx->wml) | - MX51_ECSPI_DMA_TEDEN | MX51_ECSPI_DMA_RXDEN | MX51_ECSPI_DMA_RXTDEN, spi_imx->base + MX51_ECSPI_DMA); } @@ -1520,6 +1525,8 @@ static int spi_imx_dma_transfer(struct spi_imx_data *spi_imx, reinit_completion(&spi_imx->dma_tx_completion); dma_async_issue_pending(controller->dma_tx); + spi_imx->devtype_data->trigger(spi_imx); + transfer_timeout = spi_imx_calculate_timeout(spi_imx, transfer->len); /* Wait SDMA to finish the data transfer.*/ From d0164c161923ac303bd843e04ebe95cfd03c6e19 Mon Sep 17 00:00:00 2001 From: Sukrit Bhatnagar Date: Thu, 6 Nov 2025 14:28:51 +0900 Subject: [PATCH 0778/1075] KVM: VMX: Fix check for valid GVA on an EPT violation On an EPT violation, bit 7 of the exit qualification is set if the guest linear-address is valid. The derived page fault error code should not be checked for this bit. Fixes: f3009482512e ("KVM: VMX: Set PFERR_GUEST_{FINAL,PAGE}_MASK if and only if the GVA is valid") Cc: stable@vger.kernel.org Signed-off-by: Sukrit Bhatnagar Reviewed-by: Xiaoyao Li Link: https://patch.msgid.link/20251106052853.3071088-1-Sukrit.Bhatnagar@sony.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/vmx/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/common.h b/arch/x86/kvm/vmx/common.h index bc5ece76533a..412d0829d7a2 100644 --- a/arch/x86/kvm/vmx/common.h +++ b/arch/x86/kvm/vmx/common.h @@ -98,7 +98,7 @@ static inline int __vmx_handle_ept_violation(struct kvm_vcpu *vcpu, gpa_t gpa, error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK) ? PFERR_PRESENT_MASK : 0; - if (error_code & EPT_VIOLATION_GVA_IS_VALID) + if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID) error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ? PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; From 96baf482ca1f69f0da9d10a5bd8422c87ea9039e Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 4 Nov 2025 19:37:41 -0800 Subject: [PATCH 0779/1075] net: dsa: microchip: Fix reserved multicast address table programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KSZ9477/KSZ9897 and LAN937X families of switches use a reserved multicast address table for some specific forwarding with some multicast addresses, like the one used in STP. The hardware assumes the host port is the last port in KSZ9897 family and port 5 in LAN937X family. Most of the time this assumption is correct but not in other cases like KSZ9477. Originally the function just setups the first entry, but the others still need update, especially for one common multicast address that is used by PTP operation. LAN937x also uses different register bits when accessing the reserved table. Fixes: 457c182af597 ("net: dsa: microchip: generic access to ksz9477 static and reserved table") Signed-off-by: Tristram Ha Tested-by: Łukasz Majewski Link: https://patch.msgid.link/20251105033741.6455-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477.c | 98 +++++++++++++++++++++---- drivers/net/dsa/microchip/ksz9477_reg.h | 3 +- drivers/net/dsa/microchip/ksz_common.c | 4 + drivers/net/dsa/microchip/ksz_common.h | 2 + 4 files changed, 91 insertions(+), 16 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index d747ea1c41a7..5df8f153d511 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -1355,9 +1355,15 @@ void ksz9477_config_cpu_port(struct dsa_switch *ds) } } +#define RESV_MCAST_CNT 8 + +static u8 reserved_mcast_map[RESV_MCAST_CNT] = { 0, 1, 3, 16, 32, 33, 2, 17 }; + int ksz9477_enable_stp_addr(struct ksz_device *dev) { + u8 i, ports, update; const u32 *masks; + bool override; u32 data; int ret; @@ -1366,23 +1372,87 @@ int ksz9477_enable_stp_addr(struct ksz_device *dev) /* Enable Reserved multicast table */ ksz_cfg(dev, REG_SW_LUE_CTRL_0, SW_RESV_MCAST_ENABLE, true); - /* Set the Override bit for forwarding BPDU packet to CPU */ - ret = ksz_write32(dev, REG_SW_ALU_VAL_B, - ALU_V_OVERRIDE | BIT(dev->cpu_port)); - if (ret < 0) - return ret; + /* The reserved multicast address table has 8 entries. Each entry has + * a default value of which port to forward. It is assumed the host + * port is the last port in most of the switches, but that is not the + * case for KSZ9477 or maybe KSZ9897. For LAN937X family the default + * port is port 5, the first RGMII port. It is okay for LAN9370, a + * 5-port switch, but may not be correct for the other 8-port + * versions. It is necessary to update the whole table to forward to + * the right ports. + * Furthermore PTP messages can use a reserved multicast address and + * the host will not receive them if this table is not correct. + */ + for (i = 0; i < RESV_MCAST_CNT; i++) { + data = reserved_mcast_map[i] << + dev->info->shifts[ALU_STAT_INDEX]; + data |= ALU_STAT_START | + masks[ALU_STAT_DIRECT] | + masks[ALU_RESV_MCAST_ADDR] | + masks[ALU_STAT_READ]; + ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); + if (ret < 0) + return ret; - data = ALU_STAT_START | ALU_RESV_MCAST_ADDR | masks[ALU_STAT_WRITE]; + /* wait to be finished */ + ret = ksz9477_wait_alu_sta_ready(dev); + if (ret < 0) + return ret; - ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); - if (ret < 0) - return ret; + ret = ksz_read32(dev, REG_SW_ALU_VAL_B, &data); + if (ret < 0) + return ret; - /* wait to be finished */ - ret = ksz9477_wait_alu_sta_ready(dev); - if (ret < 0) { - dev_err(dev->dev, "Failed to update Reserved Multicast table\n"); - return ret; + override = false; + ports = data & dev->port_mask; + switch (i) { + case 0: + case 6: + /* Change the host port. */ + update = BIT(dev->cpu_port); + override = true; + break; + case 2: + /* Change the host port. */ + update = BIT(dev->cpu_port); + break; + case 4: + case 5: + case 7: + /* Skip the host port. */ + update = dev->port_mask & ~BIT(dev->cpu_port); + break; + default: + update = ports; + break; + } + if (update != ports || override) { + data &= ~dev->port_mask; + data |= update; + /* Set Override bit to receive frame even when port is + * closed. + */ + if (override) + data |= ALU_V_OVERRIDE; + ret = ksz_write32(dev, REG_SW_ALU_VAL_B, data); + if (ret < 0) + return ret; + + data = reserved_mcast_map[i] << + dev->info->shifts[ALU_STAT_INDEX]; + data |= ALU_STAT_START | + masks[ALU_STAT_DIRECT] | + masks[ALU_RESV_MCAST_ADDR] | + masks[ALU_STAT_WRITE]; + ret = ksz_write32(dev, REG_SW_ALU_STAT_CTRL__4, data); + if (ret < 0) + return ret; + + /* wait to be finished */ + ret = ksz9477_wait_alu_sta_ready(dev); + if (ret < 0) + return ret; + } } return 0; diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index ff579920078e..61ea11e3338e 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -2,7 +2,7 @@ /* * Microchip KSZ9477 register definitions * - * Copyright (C) 2017-2024 Microchip Technology Inc. + * Copyright (C) 2017-2025 Microchip Technology Inc. */ #ifndef __KSZ9477_REGS_H @@ -397,7 +397,6 @@ #define ALU_RESV_MCAST_INDEX_M (BIT(6) - 1) #define ALU_STAT_START BIT(7) -#define ALU_RESV_MCAST_ADDR BIT(1) #define REG_SW_ALU_VAL_A 0x0420 diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index a962055bfdbd..933ae8dc6337 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -808,6 +808,8 @@ static const u16 ksz9477_regs[] = { static const u32 ksz9477_masks[] = { [ALU_STAT_WRITE] = 0, [ALU_STAT_READ] = 1, + [ALU_STAT_DIRECT] = 0, + [ALU_RESV_MCAST_ADDR] = BIT(1), [P_MII_TX_FLOW_CTRL] = BIT(5), [P_MII_RX_FLOW_CTRL] = BIT(3), }; @@ -835,6 +837,8 @@ static const u8 ksz9477_xmii_ctrl1[] = { static const u32 lan937x_masks[] = { [ALU_STAT_WRITE] = 1, [ALU_STAT_READ] = 2, + [ALU_STAT_DIRECT] = BIT(3), + [ALU_RESV_MCAST_ADDR] = BIT(2), [P_MII_TX_FLOW_CTRL] = BIT(5), [P_MII_RX_FLOW_CTRL] = BIT(3), }; diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index a1eb39771bb9..c65188cd3c0a 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -294,6 +294,8 @@ enum ksz_masks { DYNAMIC_MAC_TABLE_TIMESTAMP, ALU_STAT_WRITE, ALU_STAT_READ, + ALU_STAT_DIRECT, + ALU_RESV_MCAST_ADDR, P_MII_TX_FLOW_CTRL, P_MII_RX_FLOW_CTRL, }; From 067bf016e99ad72aa4ff869d6dec1fd62a9c6202 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 5 Nov 2025 07:26:20 +0000 Subject: [PATCH 0780/1075] bonding: fix NULL pointer dereference in actor_port_prio setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Liang reported an issue where setting a slave’s actor_port_prio to predefined values such as 0, 255, or 65535 would cause a system crash. The problem occurs because in bond_opt_parse(), when the provided value matches a predefined table entry, the function returns that table entry, which does not contain slave information. Later, in bond_option_actor_port_prio_set(), calling bond_slave_get_rtnl() leads to a NULL pointer dereference. Since actor_port_prio is defined as a u16 and initialized to the default value of 255 in ad_initialize_port(), there is no need for the bond_actor_port_prio_tbl. Using the BOND_OPTFLAG_RAWVAL flag is sufficient. Fixes: 6b6dc81ee7e8 ("bonding: add support for per-port LACP actor priority") Reported-by: Liang Li Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251105072620.164841-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_options.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 495a87f2ea7c..384499c869b8 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -225,13 +225,6 @@ static const struct bond_opt_value bond_ad_actor_sys_prio_tbl[] = { { NULL, -1, 0}, }; -static const struct bond_opt_value bond_actor_port_prio_tbl[] = { - { "minval", 0, BOND_VALFLAG_MIN}, - { "maxval", 65535, BOND_VALFLAG_MAX}, - { "default", 255, BOND_VALFLAG_DEFAULT}, - { NULL, -1, 0}, -}; - static const struct bond_opt_value bond_ad_user_port_key_tbl[] = { { "minval", 0, BOND_VALFLAG_MIN | BOND_VALFLAG_DEFAULT}, { "maxval", 1023, BOND_VALFLAG_MAX}, @@ -497,7 +490,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .id = BOND_OPT_ACTOR_PORT_PRIO, .name = "actor_port_prio", .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), - .values = bond_actor_port_prio_tbl, + .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_actor_port_prio_set, }, [BOND_OPT_AD_ACTOR_SYSTEM] = { From 9065b968752334f972e0d48e50c4463a172fc2a7 Mon Sep 17 00:00:00 2001 From: Nicolas Escande Date: Tue, 4 Nov 2025 09:39:57 +0100 Subject: [PATCH 0781/1075] wifi: ath11k: zero init info->status in wmi_process_mgmt_tx_comp() When reporting tx completion using ieee80211_tx_status_xxx() family of functions, the status part of the struct ieee80211_tx_info nested in the skb is used to report things like transmit rates & retry count to mac80211 On the TX data path, this is correctly memset to 0 before calling ieee80211_tx_status_ext(), but on the tx mgmt path this was not done. This leads to mac80211 treating garbage values as valid transmit counters (like tx retries for example) and accounting them as real statistics that makes their way to userland via station dump. The same issue was resolved in ath12k by commit 9903c0986f78 ("wifi: ath12k: Add memset and update default rate value in wmi tx completion") Tested-on: QCN9074 PCI WLAN.HK.2.9.0.1-01977-QCAHKSWPL_SILICONZ-1 Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices") Signed-off-by: Nicolas Escande Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20251104083957.717825-1-nico.escande@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 0491e3fd6b5e..e3b444333dee 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -5961,6 +5961,9 @@ static int wmi_process_mgmt_tx_comp(struct ath11k *ar, dma_unmap_single(ar->ab->dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); info = IEEE80211_SKB_CB(msdu); + memset(&info->status, 0, sizeof(info->status)); + info->status.rates[0].idx = -1; + if ((!(info->flags & IEEE80211_TX_CTL_NO_ACK)) && !tx_compl_param->status) { info->flags |= IEEE80211_TX_STAT_ACK; From 0216721ce71252f60d89af49c8dff613358058d3 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 5 Nov 2025 08:49:55 +0100 Subject: [PATCH 0782/1075] lan966x: Fix sleeping in atomic context The following warning was seen when we try to connect using ssh to the device. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:575 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 104, name: dropbear preempt_count: 1, expected: 0 INFO: lockdep is turned off. CPU: 0 UID: 0 PID: 104 Comm: dropbear Tainted: G W 6.18.0-rc2-00399-g6f1ab1b109b9-dirty #530 NONE Tainted: [W]=WARN Hardware name: Generic DT based system Call trace: unwind_backtrace from show_stack+0x10/0x14 show_stack from dump_stack_lvl+0x7c/0xac dump_stack_lvl from __might_resched+0x16c/0x2b0 __might_resched from __mutex_lock+0x64/0xd34 __mutex_lock from mutex_lock_nested+0x1c/0x24 mutex_lock_nested from lan966x_stats_get+0x5c/0x558 lan966x_stats_get from dev_get_stats+0x40/0x43c dev_get_stats from dev_seq_printf_stats+0x3c/0x184 dev_seq_printf_stats from dev_seq_show+0x10/0x30 dev_seq_show from seq_read_iter+0x350/0x4ec seq_read_iter from seq_read+0xfc/0x194 seq_read from proc_reg_read+0xac/0x100 proc_reg_read from vfs_read+0xb0/0x2b0 vfs_read from ksys_read+0x6c/0xec ksys_read from ret_fast_syscall+0x0/0x1c Exception stack(0xf0b11fa8 to 0xf0b11ff0) 1fa0: 00000001 00001000 00000008 be9048d8 00001000 00000001 1fc0: 00000001 00001000 00000008 00000003 be905920 0000001e 00000000 00000001 1fe0: 0005404c be9048c0 00018684 b6ec2cd8 It seems that we are using a mutex in a atomic context which is wrong. Change the mutex with a spinlock. Fixes: 12c2d0a5b8e2 ("net: lan966x: add ethtool configuration and statistics") Signed-off-by: Horatiu Vultur Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251105074955.1766792-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- .../microchip/lan966x/lan966x_ethtool.c | 18 +++++++++--------- .../ethernet/microchip/lan966x/lan966x_main.c | 2 -- .../ethernet/microchip/lan966x/lan966x_main.h | 4 ++-- .../microchip/lan966x/lan966x_vcap_impl.c | 8 ++++---- 4 files changed, 15 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c index 2474dfd330f4..fe4e61405284 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ethtool.c @@ -294,7 +294,7 @@ static void lan966x_stats_update(struct lan966x *lan966x) { int i, j; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); for (i = 0; i < lan966x->num_phys_ports; i++) { uint idx = i * lan966x->num_stats; @@ -310,7 +310,7 @@ static void lan966x_stats_update(struct lan966x *lan966x) } } - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static int lan966x_get_sset_count(struct net_device *dev, int sset) @@ -365,7 +365,7 @@ static void lan966x_get_eth_mac_stats(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); mac_stats->FramesTransmittedOK = lan966x->stats[idx + SYS_COUNT_TX_UC] + @@ -416,7 +416,7 @@ static void lan966x_get_eth_mac_stats(struct net_device *dev, lan966x->stats[idx + SYS_COUNT_RX_LONG] + lan966x->stats[idx + SYS_COUNT_RX_PMAC_LONG]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static const struct ethtool_rmon_hist_range lan966x_rmon_ranges[] = { @@ -442,7 +442,7 @@ static void lan966x_get_eth_rmon_stats(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); rmon_stats->undersize_pkts = lan966x->stats[idx + SYS_COUNT_RX_SHORT] + @@ -500,7 +500,7 @@ static void lan966x_get_eth_rmon_stats(struct net_device *dev, lan966x->stats[idx + SYS_COUNT_TX_SZ_1024_1526] + lan966x->stats[idx + SYS_COUNT_TX_PMAC_SZ_1024_1526]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); *ranges = lan966x_rmon_ranges; } @@ -603,7 +603,7 @@ void lan966x_stats_get(struct net_device *dev, idx = port->chip_port * lan966x->num_stats; - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); stats->rx_bytes = lan966x->stats[idx + SYS_COUNT_RX_OCT] + lan966x->stats[idx + SYS_COUNT_RX_PMAC_OCT]; @@ -685,7 +685,7 @@ void lan966x_stats_get(struct net_device *dev, stats->collisions = lan966x->stats[idx + SYS_COUNT_TX_COL]; - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } int lan966x_stats_init(struct lan966x *lan966x) @@ -701,7 +701,7 @@ int lan966x_stats_init(struct lan966x *lan966x) return -ENOMEM; /* Init stats worker */ - mutex_init(&lan966x->stats_lock); + spin_lock_init(&lan966x->stats_lock); snprintf(queue_name, sizeof(queue_name), "%s-stats", dev_name(lan966x->dev)); lan966x->stats_queue = create_singlethread_workqueue(queue_name); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 7001584f1b7a..47752d3fde0b 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -1261,7 +1261,6 @@ static int lan966x_probe(struct platform_device *pdev) cancel_delayed_work_sync(&lan966x->stats_work); destroy_workqueue(lan966x->stats_queue); - mutex_destroy(&lan966x->stats_lock); debugfs_remove_recursive(lan966x->debugfs_root); @@ -1279,7 +1278,6 @@ static void lan966x_remove(struct platform_device *pdev) cancel_delayed_work_sync(&lan966x->stats_work); destroy_workqueue(lan966x->stats_queue); - mutex_destroy(&lan966x->stats_lock); lan966x_mac_purge_entries(lan966x); lan966x_mdb_deinit(lan966x); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index 4f75f0688369..eea286c29474 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -295,8 +295,8 @@ struct lan966x { const struct lan966x_stat_layout *stats_layout; u32 num_stats; - /* workqueue for reading stats */ - struct mutex stats_lock; + /* lock for reading stats */ + spinlock_t stats_lock; u64 *stats; struct delayed_work stats_work; struct workqueue_struct *stats_queue; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index a1471e38d118..2a37fc1ba4bc 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -403,11 +403,11 @@ static void lan966x_es0_read_esdx_counter(struct lan966x *lan966x, u32 counter; id = id & 0xff; /* counter limit */ - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); counter = lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)) + lan_rd(lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); if (counter) admin->cache.counter = counter; } @@ -417,14 +417,14 @@ static void lan966x_es0_write_esdx_counter(struct lan966x *lan966x, { id = id & 0xff; /* counter limit */ - mutex_lock(&lan966x->stats_lock); + spin_lock(&lan966x->stats_lock); lan_wr(SYS_STAT_CFG_STAT_VIEW_SET(id), lan966x, SYS_STAT_CFG); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_BYTES)); lan_wr(admin->cache.counter, lan966x, SYS_CNT(LAN966X_STAT_ESDX_GRN_PKTS)); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_BYTES)); lan_wr(0, lan966x, SYS_CNT(LAN966X_STAT_ESDX_YEL_PKTS)); - mutex_unlock(&lan966x->stats_lock); + spin_unlock(&lan966x->stats_lock); } static void lan966x_vcap_cache_write(struct net_device *dev, From 8dca36978aa80bab9d4da130c211db75c9e00048 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 5 Nov 2025 13:19:18 +0200 Subject: [PATCH 0783/1075] net: bridge: fix use-after-free due to MST port state bypass syzbot reported[1] a use-after-free when deleting an expired fdb. It is due to a race condition between learning still happening and a port being deleted, after all its fdbs have been flushed. The port's state has been toggled to disabled so no learning should happen at that time, but if we have MST enabled, it will bypass the port's state, that together with VLAN filtering disabled can lead to fdb learning at a time when it shouldn't happen while the port is being deleted. VLAN filtering must be disabled because we flush the port VLANs when it's being deleted which will stop learning. This fix adds a check for the port's vlan group which is initialized to NULL when the port is getting deleted, that avoids the port state bypass. When MST is enabled there would be a minimal new overhead in the fast-path because the port's vlan group pointer is cache-hot. [1] https://syzkaller.appspot.com/bug?extid=dd280197f0f7ab3917be Fixes: ec7328b59176 ("net: bridge: mst: Multiple Spanning Tree (MST) mode") Reported-by: syzbot+dd280197f0f7ab3917be@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/69088ffa.050a0220.29fc44.003d.GAE@google.com/ Signed-off-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20251105111919.1499702-2-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_forward.c | 2 +- net/bridge/br_input.c | 4 ++-- net/bridge/br_private.h | 8 +++++--- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 870bdf2e082c..dea09096ad0f 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -25,7 +25,7 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - (br_mst_is_enabled(p->br) || p->state == BR_STATE_FORWARDING) && + (br_mst_is_enabled(p) || p->state == BR_STATE_FORWARDING) && br_allowed_egress(vg, skb) && nbp_switchdev_allowed_egress(p, skb) && !br_skb_isolated(p, skb); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 67b4c905e49a..777fa869c1a1 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -94,7 +94,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb br = p->br; - if (br_mst_is_enabled(br)) { + if (br_mst_is_enabled(p)) { state = BR_STATE_FORWARDING; } else { if (p->state == BR_STATE_DISABLED) { @@ -429,7 +429,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; forward: - if (br_mst_is_enabled(p->br)) + if (br_mst_is_enabled(p)) goto defer_stp_filtering; switch (p->state) { diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 16be5d250402..b571d6f61389 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1935,10 +1935,12 @@ static inline bool br_vlan_state_allowed(u8 state, bool learn_allow) /* br_mst.c */ #ifdef CONFIG_BRIDGE_VLAN_FILTERING DECLARE_STATIC_KEY_FALSE(br_mst_used); -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { + /* check the port's vlan group to avoid racing with port deletion */ return static_branch_unlikely(&br_mst_used) && - br_opt_get(br, BROPT_MST_ENABLED); + br_opt_get(p->br, BROPT_MST_ENABLED) && + rcu_access_pointer(p->vlgrp); } int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, @@ -1953,7 +1955,7 @@ int br_mst_fill_info(struct sk_buff *skb, int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack); #else -static inline bool br_mst_is_enabled(struct net_bridge *br) +static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { return false; } From ee87c63f9b2a418f698d79c2991347e31a7d2c27 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Wed, 5 Nov 2025 13:19:19 +0200 Subject: [PATCH 0784/1075] net: bridge: fix MST static key usage As Ido pointed out, the static key usage in MST is buggy and should use inc/dec instead of enable/disable because we can have multiple bridges with MST enabled which means a single bridge can disable MST for all. Use static_branch_inc/dec to avoid that. When destroying a bridge decrement the key if MST was enabled. Fixes: ec7328b59176 ("net: bridge: mst: Multiple Spanning Tree (MST) mode") Reported-by: Ido Schimmel Closes: https://lore.kernel.org/netdev/20251104120313.1306566-1-razor@blackwall.org/T/#m6888d87658f94ed1725433940f4f4ebb00b5a68b Signed-off-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20251105111919.1499702-3-razor@blackwall.org Signed-off-by: Jakub Kicinski --- net/bridge/br_if.c | 1 + net/bridge/br_mst.c | 10 ++++++++-- net/bridge/br_private.h | 5 +++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 98c5b9c3145f..ca3a637d7cca 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -386,6 +386,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_mst_uninit(br); br_recalculate_neigh_suppress_enabled(br); br_fdb_delete_by_port(br, NULL, 0, 1); diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 3f24b4ee49c2..43a300ae6bfa 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -22,6 +22,12 @@ bool br_mst_enabled(const struct net_device *dev) } EXPORT_SYMBOL_GPL(br_mst_enabled); +void br_mst_uninit(struct net_bridge *br) +{ + if (br_opt_get(br, BROPT_MST_ENABLED)) + static_branch_dec(&br_mst_used); +} + int br_mst_get_info(const struct net_device *dev, u16 msti, unsigned long *vids) { const struct net_bridge_vlan_group *vg; @@ -225,9 +231,9 @@ int br_mst_set_enabled(struct net_bridge *br, bool on, return err; if (on) - static_branch_enable(&br_mst_used); + static_branch_inc(&br_mst_used); else - static_branch_disable(&br_mst_used); + static_branch_dec(&br_mst_used); br_opt_toggle(br, BROPT_MST_ENABLED, on); return 0; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b571d6f61389..7280c4e9305f 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1954,6 +1954,7 @@ int br_mst_fill_info(struct sk_buff *skb, const struct net_bridge_vlan_group *vg); int br_mst_process(struct net_bridge_port *p, const struct nlattr *mst_attr, struct netlink_ext_ack *extack); +void br_mst_uninit(struct net_bridge *br); #else static inline bool br_mst_is_enabled(const struct net_bridge_port *p) { @@ -1989,6 +1990,10 @@ static inline int br_mst_process(struct net_bridge_port *p, { return -EOPNOTSUPP; } + +static inline void br_mst_uninit(struct net_bridge *br) +{ +} #endif struct nf_br_ops { From 3534e03e0ec2e00908765549828a69df5ebefb91 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Wed, 5 Nov 2025 07:59:19 -0800 Subject: [PATCH 0785/1075] selftests/vsock: avoid false-positives when checking dmesg Sometimes VMs will have some intermittent dmesg warnings that are unrelated to vsock. Change the dmesg parsing to filter on strings containing 'vsock' to avoid false positive failures that are unrelated to vsock. The downside is that it is possible for some vsock related warnings to not contain the substring 'vsock', so those will be missed. Fixes: a4a65c6fe08b ("selftests/vsock: add initial vmtest.sh for vsock") Reviewed-by: Simon Horman Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20251105-vsock-vmtest-dmesg-fix-v2-1-1a042a14892c@meta.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/vsock/vmtest.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index edacebfc1632..8ceeb8a7894f 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -389,9 +389,9 @@ run_test() { local rc host_oops_cnt_before=$(dmesg | grep -c -i 'Oops') - host_warn_cnt_before=$(dmesg --level=warn | wc -l) + host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock') vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') - vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') name=$(echo "${1}" | awk '{ print $1 }') eval test_"${name}" @@ -403,7 +403,7 @@ run_test() { rc=$KSFT_FAIL fi - host_warn_cnt_after=$(dmesg --level=warn | wc -l) + host_warn_cnt_after=$(dmesg --level=warn | grep -c -i 'vsock') if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on host" | log_host "${name}" rc=$KSFT_FAIL @@ -415,7 +415,7 @@ run_test() { rc=$KSFT_FAIL fi - vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on vm" | log_host "${name}" rc=$KSFT_FAIL From a26a6c93edfeee82cb73f55e87d995eea59ddfe8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 5 Nov 2025 15:30:27 -0700 Subject: [PATCH 0786/1075] kbuild: Strip trailing padding bytes from modules.builtin.modinfo After commit d50f21091358 ("kbuild: align modinfo section for Secureboot Authenticode EDK2 compat"), running modules_install with certain versions of kmod (such as 29.1 in Ubuntu Jammy) in certain configurations may fail with: depmod: ERROR: kmod_builtin_iter_next: unexpected string without modname prefix The additional padding bytes to ensure .modinfo is aligned within vmlinux.unstripped are unexpected by kmod, as this section has always just been null-terminated strings. Strip the trailing padding bytes from modules.builtin.modinfo after it has been extracted from vmlinux.unstripped to restore the format that kmod expects while keeping .modinfo aligned within vmlinux.unstripped to avoid regressing the Authenticode calculation fix for EDK2. Cc: stable@vger.kernel.org Fixes: d50f21091358 ("kbuild: align modinfo section for Secureboot Authenticode EDK2 compat") Reported-by: Omar Sandoval Reported-by: Samir M Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/7fef7507-ad64-4e51-9bb8-c9fb6532e51e@linux.ibm.com/ Tested-by: Omar Sandoval Tested-by: Samir M Tested-by: Venkat Rao Bagalkote Reviewed-by: Nicolas Schier Link: https://patch.msgid.link/20251105-kbuild-fix-builtin-modinfo-for-kmod-v1-1-b419d8ad4606@kernel.org Signed-off-by: Nathan Chancellor --- scripts/Makefile.vmlinux | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index ced4379550d7..cd788cac9d91 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -102,11 +102,24 @@ vmlinux: vmlinux.unstripped FORCE # modules.builtin.modinfo # --------------------------------------------------------------------------- +# .modinfo in vmlinux.unstripped is aligned to 8 bytes for compatibility with +# tools that expect vmlinux to have sufficiently aligned sections but the +# additional bytes used for padding .modinfo to satisfy this requirement break +# certain versions of kmod with +# +# depmod: ERROR: kmod_builtin_iter_next: unexpected string without modname prefix +# +# Strip the trailing padding bytes after extracting .modinfo to comply with +# what kmod expects to parse. +quiet_cmd_modules_builtin_modinfo = GEN $@ + cmd_modules_builtin_modinfo = $(cmd_objcopy); \ + sed -i 's/\x00\+$$/\x00/g' $@ + OBJCOPYFLAGS_modules.builtin.modinfo := -j .modinfo -O binary targets += modules.builtin.modinfo modules.builtin.modinfo: vmlinux.unstripped FORCE - $(call if_changed,objcopy) + $(call if_changed,modules_builtin_modinfo) # modules.builtin # --------------------------------------------------------------------------- From eb6e7f520d6efa4d4ebf1671455abe4a681f7a05 Mon Sep 17 00:00:00 2001 From: Samuel Zhang Date: Wed, 5 Nov 2025 03:04:08 +0000 Subject: [PATCH 0787/1075] drm/amdgpu: fix gpu page fault after hibernation on PF passthrough On PF passthrough environment, after hibernate and then resume, coralgemm will cause gpu page fault. Mode1 reset happens during hibernate, but partition mode is not restored on resume, register mmCP_HYP_XCP_CTL and mmCP_PSP_XCP_CTL is not right after resume. When CP access the MQD BO, wrong stride size is used, this will cause out of bound access on the MQD BO, resulting page fault. The fix is to ensure gfx_v9_4_3_switch_compute_partition() is called when resume from a hibernation. KFD resume is called separately during a reset recovery or resume from suspend sequence. Hence it's not required to be called as part of partition switch. Signed-off-by: Samuel Zhang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher (cherry picked from commit 5d1b32cfe4a676fe552416cb5ae847b215463a1a) --- drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 811124ff88a8..f9e2edf5260b 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -407,7 +407,8 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, return -EINVAL; } - if (adev->kfd.init_complete && !amdgpu_in_reset(adev)) + if (adev->kfd.init_complete && !amdgpu_in_reset(adev) && + !adev->in_suspend) flags |= AMDGPU_XCP_OPS_KFD; if (flags & AMDGPU_XCP_OPS_KFD) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 77f9d5b9a556..c90cbe053ef3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -2292,7 +2292,9 @@ static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode); } else { - if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + if (adev->in_suspend) + amdgpu_xcp_restore_partition_mode(adev->xcp_mgr); + else if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) r = amdgpu_xcp_switch_partition_mode( From b09cb2996cdf50cd1ab4020e002c95d742c81313 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 4 Nov 2025 13:38:02 -0600 Subject: [PATCH 0788/1075] drm/amd: Fix suspend failure with secure display TA commit c760bcda83571 ("drm/amd: Check whether secure display TA loaded successfully") attempted to fix extra messages, but failed to port the cleanup that was in commit 5c6d52ff4b61e ("drm/amd: Don't try to enable secure display TA multiple times") to prevent multiple tries. Add that to the failure handling path even on a quick failure. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4679 Fixes: c760bcda8357 ("drm/amd: Check whether secure display TA loaded successfully") Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 4104c0a454f6a4d1e0d14895d03c0e7bdd0c8240) --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 8c0e5d03de50..aa7987d0806c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -2355,8 +2355,11 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (!ret && !psp->securedisplay_context.context.resp_status) { psp->securedisplay_context.context.initialized = true; mutex_init(&psp->securedisplay_context.mutex); - } else + } else { + /* don't try again */ + psp->securedisplay_context.context.bin_desc.size_bytes = 0; return ret; + } mutex_lock(&psp->securedisplay_context.mutex); From 570a66b48c22214851949fcd71816fee280aa096 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Mon, 3 Nov 2025 16:21:50 +0530 Subject: [PATCH 0789/1075] drm/amdgpu: Fix wait after reset sequence in S3 For a mode-1 reset done at the end of S3 on PSPv11 dGPUs, only check if TOS is unloaded. Fixes: 32f73741d6ee ("drm/amdgpu: Wait for bootloader after PSPv11 reset") Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/4649 Signed-off-by: Lijo Lazar Acked-by: Alex Deucher Signed-off-by: Alex Deucher (cherry picked from commit 1ad25fd272753db14c5d1cc8c68e20ce01f3f888) --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 9 +++++++-- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 26 ++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 61268aa82df4..7333e19291cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2632,9 +2632,14 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + int r; - if (amdgpu_acpi_should_gpu_reset(adev)) - return amdgpu_asic_reset(adev); + if (amdgpu_acpi_should_gpu_reset(adev)) { + amdgpu_device_lock_reset_domain(adev->reset_domain); + r = amdgpu_asic_reset(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); + return r; + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 64b240b51f1a..a9be7a505026 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -142,13 +142,37 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) return err; } +static int psp_v11_wait_for_tos_unload(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg1, sol_reg2; + int retry_loop; + + /* Wait for the TOS to be unloaded */ + for (retry_loop = 0; retry_loop < 20; retry_loop++) { + sol_reg1 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + usleep_range(1000, 2000); + sol_reg2 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg1 == sol_reg2) + return 0; + } + dev_err(adev->dev, "TOS unload failed, C2PMSG_33: %x C2PMSG_81: %x", + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_33), + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81)); + + return -ETIME; +} + static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; - int ret; int retry_loop; + /* For a reset done at the end of S3, only wait for TOS to be unloaded */ + if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev)) + return psp_v11_wait_for_tos_unload(psp); + for (retry_loop = 0; retry_loop < 20; retry_loop++) { /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ From 3c6a743c6961cc2cab453b343bb157d6bbbf8120 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Wed, 5 Nov 2025 10:36:31 +0800 Subject: [PATCH 0790/1075] drm/amd/display: Enable mst when it's detected but yet to be initialized [Why] drm_dp_mst_topology_queue_probe() is used under the assumption that mst is already initialized. If we connect system with SST first then switch to the mst branch during suspend, we will fail probing topology by calling the wrong API since the mst manager is yet to be initialized. [How] At dm_resume(), once it's detected as mst branc connected, check if the mst is initialized already. If not, call dm_helpers_dp_mst_start_top_mgr() instead to initialize mst V2: Adjust the commit msg a bit Fixes: bc068194f548 ("drm/amd/display: Don't write DP_MSTM_CTRL after LT") Cc: Fangzhi Zuo Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Tom Chung Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher (cherry picked from commit 62320fb8d91a0bddc44a228203cfa9bfbb5395bd) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 2fe6520207d6..91c0188a29b2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3563,6 +3563,7 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) /* Do mst topology probing after resuming cached state*/ drm_connector_list_iter_begin(ddev, &iter); drm_for_each_connector_iter(connector, &iter) { + bool init = false; if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) continue; @@ -3572,7 +3573,14 @@ static int dm_resume(struct amdgpu_ip_block *ip_block) aconnector->mst_root) continue; - drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr); + scoped_guard(mutex, &aconnector->mst_mgr.lock) { + init = !aconnector->mst_mgr.mst_primary; + } + if (init) + dm_helpers_dp_mst_start_top_mgr(aconnector->dc_link->ctx, + aconnector->dc_link, false); + else + drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr); } drm_connector_list_iter_end(&iter); From 3dc8c73365d3ca25c99e7e1a0f493039d7291df5 Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Thu, 6 Nov 2025 22:31:14 +0800 Subject: [PATCH 0791/1075] ASoC: codecs: va-macro: fix resource leak in probe error path In the commit referenced by the Fixes tag, clk_hw_get_clk() was added in va_macro_probe() to get the fsgen clock, but forgot to add the corresponding clk_put() in va_macro_remove(). This leads to a clock reference leak when the driver is unloaded. Switch to devm_clk_hw_get_clk() to automatically manage the clock resource. Fixes: 30097967e056 ("ASoC: codecs: va-macro: use fsgen as clock") Suggested-by: Konrad Dybcio Signed-off-by: Haotian Zhang Reviewed-by: Konrad Dybcio Link: https://patch.msgid.link/20251106143114.729-1-vulab@iscas.ac.cn Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-va-macro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index 2e1b77973a3e..92c177b82a02 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -1638,7 +1638,7 @@ static int va_macro_probe(struct platform_device *pdev) if (ret) goto err_clkout; - va->fsgen = clk_hw_get_clk(&va->hw, "fsgen"); + va->fsgen = devm_clk_hw_get_clk(dev, &va->hw, "fsgen"); if (IS_ERR(va->fsgen)) { ret = PTR_ERR(va->fsgen); goto err_clkout; From 4436f484cb437ba28dc58b7f787a6f80a65aa5c3 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 6 Nov 2025 17:16:25 +0100 Subject: [PATCH 0792/1075] gpio: tb10x: Drop unused tb10x_set_bits() function tb10x_set_bits() is not referenced anywhere leading to W=1 warning: gpio-tb10x.c:59:20: error: unused function 'tb10x_set_bits' [-Werror,-Wunused-function] After its removal, tb10x_reg_write() becomes unused as well. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20251106-gpio-of-match-v1-1-50c7115a045e@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tb10x.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/gpio/gpio-tb10x.c b/drivers/gpio/gpio-tb10x.c index 09a448ce3eec..3c8fd322a713 100644 --- a/drivers/gpio/gpio-tb10x.c +++ b/drivers/gpio/gpio-tb10x.c @@ -50,25 +50,6 @@ static inline u32 tb10x_reg_read(struct tb10x_gpio *gpio, unsigned int offs) return ioread32(gpio->base + offs); } -static inline void tb10x_reg_write(struct tb10x_gpio *gpio, unsigned int offs, - u32 val) -{ - iowrite32(val, gpio->base + offs); -} - -static inline void tb10x_set_bits(struct tb10x_gpio *gpio, unsigned int offs, - u32 mask, u32 val) -{ - u32 r; - - guard(gpio_generic_lock_irqsave)(&gpio->chip); - - r = tb10x_reg_read(gpio, offs); - r = (r & ~mask) | (val & mask); - - tb10x_reg_write(gpio, offs, r); -} - static int tb10x_gpio_to_irq(struct gpio_chip *chip, unsigned offset) { struct tb10x_gpio *tb10x_gpio = gpiochip_get_data(chip); From a9da90e618cd0669a22bcc06a96209db5dd96e9b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Nov 2025 15:41:19 +0100 Subject: [PATCH 0793/1075] wifi: mac80211: reject address change while connecting While connecting, the MAC address can already no longer be changed. The change is already rejected if netif_carrier_ok(), but of course that's not true yet while connecting. Check for auth_data or assoc_data, so the MAC address cannot be changed. Also more comprehensively check that there are no stations on the interface being changed - if any peer station is added it will know about our address already, so we cannot change it. Cc: stable@vger.kernel.org Fixes: 3c06e91b40db ("wifi: mac80211: Support POWERED_ADDR_CHANGE feature") Link: https://patch.msgid.link/20251105154119.f9f6c1df81bb.I9bb3760ede650fb96588be0d09a5a7bdec21b217@changeid Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a7873832d4fa..0ca55b9655a7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -223,6 +223,10 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata if (netif_carrier_ok(sdata->dev)) return -EBUSY; + /* if any stations are set known (so they know this vif too), reject */ + if (sta_info_get_by_idx(sdata, 0)) + return -EBUSY; + /* First check no ROC work is happening on this iface */ list_for_each_entry(roc, &local->roc_list, list) { if (roc->sdata != sdata) @@ -242,12 +246,16 @@ static int ieee80211_can_powered_addr_change(struct ieee80211_sub_if_data *sdata ret = -EBUSY; } + /* + * More interface types could be added here but changing the + * address while powered makes the most sense in client modes. + */ switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - /* More interface types could be added here but changing the - * address while powered makes the most sense in client modes. - */ + /* refuse while connecting */ + if (sdata->u.mgd.auth_data || sdata->u.mgd.assoc_data) + return -EBUSY; break; default: ret = -EOPNOTSUPP; From f2a12cc3b97f062186568a7b94ddb7aa2ef68140 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 31 Oct 2025 13:47:39 +0800 Subject: [PATCH 0794/1075] erofs: avoid infinite loop due to incomplete zstd-compressed data Currently, the decompression logic incorrectly spins if compressed data is truncated in crafted (deliberately corrupted) images. Fixes: 7c35de4df105 ("erofs: Zstandard compression support") Reported-by: Robert Morris Closes: https://lore.kernel.org/r/50958.1761605413@localhost Signed-off-by: Gao Xiang Reviewed-by: Chunhai Guo Reviewed-by: Chao Yu --- fs/erofs/decompressor_zstd.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/erofs/decompressor_zstd.c b/fs/erofs/decompressor_zstd.c index b4bfe14229f9..e38d93bb2104 100644 --- a/fs/erofs/decompressor_zstd.c +++ b/fs/erofs/decompressor_zstd.c @@ -172,7 +172,6 @@ static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq, dctx.bounce = strm->bounce; do { - dctx.avail_out = out_buf.size - out_buf.pos; dctx.inbuf_sz = in_buf.size; dctx.inbuf_pos = in_buf.pos; err = z_erofs_stream_switch_bufs(&dctx, &out_buf.dst, @@ -188,14 +187,18 @@ static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq, in_buf.pos = dctx.inbuf_pos; zerr = zstd_decompress_stream(stream, &out_buf, &in_buf); - if (zstd_is_error(zerr) || (!zerr && rq->outputsize)) { + dctx.avail_out = out_buf.size - out_buf.pos; + if (zstd_is_error(zerr) || + ((rq->outputsize + dctx.avail_out) && (!zerr || (zerr > 0 && + !(rq->inputsize + in_buf.size - in_buf.pos))))) { erofs_err(sb, "failed to decompress in[%u] out[%u]: %s", rq->inputsize, rq->outputsize, - zerr ? zstd_get_error_name(zerr) : "unexpected end of stream"); + zstd_is_error(zerr) ? zstd_get_error_name(zerr) : + "unexpected end of stream"); err = -EFSCORRUPTED; break; } - } while (rq->outputsize || out_buf.pos < out_buf.size); + } while (rq->outputsize + dctx.avail_out); if (dctx.kout) kunmap_local(dctx.kout); From 10d9dda426d684e98b17161f02f77894c6de9b60 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 7 Nov 2025 01:52:15 +0900 Subject: [PATCH 0795/1075] tracing: tprobe-events: Fix to register tracepoint correctly Since __tracepoint_user_init() calls tracepoint_user_register() without initializing tuser->tpoint with given tracpoint, it does not register tracepoint stub function as callback correctly, and tprobe does not work. Initializing tuser->tpoint correctly before tracepoint_user_register() so that it sets up tracepoint callback. I confirmed below example works fine again. echo "t sched_switch preempt prev_pid=prev->pid next_pid=next->pid" > /sys/kernel/tracing/dynamic_events echo 1 > /sys/kernel/tracing/events/tracepoints/sched_switch/enable cat /sys/kernel/tracing/trace_pipe Link: https://lore.kernel.org/all/176244793514.155515.6466348656998627773.stgit@devnote2/ Fixes: 2867495dea86 ("tracing: tprobe-events: Register tracepoint when enable tprobe event") Reported-by: Beau Belgrave Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Tested-by: Beau Belgrave Reviewed-by: Beau Belgrave --- kernel/trace/trace_fprobe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index ad9d6347b5fa..fd1b108ab639 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -106,13 +106,14 @@ static struct tracepoint_user *__tracepoint_user_init(const char *name, struct t if (!tuser->name) return NULL; + /* Register tracepoint if it is loaded. */ if (tpoint) { + tuser->tpoint = tpoint; ret = tracepoint_user_register(tuser); if (ret) return ERR_PTR(ret); } - tuser->tpoint = tpoint; tuser->refcount = 1; INIT_LIST_HEAD(&tuser->list); list_add(&tuser->list, &tracepoint_user_list); From c91afa7610235f89a5e8f5686aac23892ab227ed Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 7 Nov 2025 01:52:24 +0900 Subject: [PATCH 0796/1075] tracing: tprobe-events: Fix to put tracepoint_user when disable the tprobe __unregister_trace_fprobe() checks tf->tuser to put it when removing tprobe. However, disable_trace_fprobe() does not use it and only calls unregister_fprobe(). Thus it forgets to disable tracepoint_user. If the trace_fprobe has tuser, put it for unregistering the tracepoint callbacks when disabling tprobe correctly. Link: https://lore.kernel.org/all/176244794466.155515.3971904050506100243.stgit@devnote2/ Fixes: 2867495dea86 ("tracing: tprobe-events: Register tracepoint when enable tprobe event") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Tested-by: Beau Belgrave Reviewed-by: Beau Belgrave --- kernel/trace/trace_fprobe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index fd1b108ab639..8001dbf16891 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -1514,6 +1514,10 @@ static int disable_trace_fprobe(struct trace_event_call *call, if (!trace_probe_is_enabled(tp)) { list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { unregister_fprobe(&tf->fp); + if (tf->tuser) { + tracepoint_user_put(tf->tuser); + tf->tuser = NULL; + } } } From 74d4432421a3e2669fbccc08c0f4fc2980bf0e39 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 5 Nov 2025 21:29:08 +0200 Subject: [PATCH 0797/1075] docs: netlink: Couple of intro-specs documentation fixes Fix typo "handul" to "handful" and remove outdated limitation stating only generic netlink is supported (we have netlink-raw). Reviewed-by: Carolina Jubran Signed-off-by: Gal Pressman Link: https://patch.msgid.link/20251105192908.686458-1-gal@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/userspace-api/netlink/intro-specs.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/userspace-api/netlink/intro-specs.rst b/Documentation/userspace-api/netlink/intro-specs.rst index a4435ae4628d..e5ebc617754a 100644 --- a/Documentation/userspace-api/netlink/intro-specs.rst +++ b/Documentation/userspace-api/netlink/intro-specs.rst @@ -13,10 +13,10 @@ Simple CLI Kernel comes with a simple CLI tool which should be useful when developing Netlink related code. The tool is implemented in Python and can use a YAML specification to issue Netlink requests -to the kernel. Only Generic Netlink is supported. +to the kernel. The tool is located at ``tools/net/ynl/pyynl/cli.py``. It accepts -a handul of arguments, the most important ones are: +a handful of arguments, the most important ones are: - ``--spec`` - point to the spec file - ``--do $name`` / ``--dump $name`` - issue request ``$name`` From aa997d2d2a0b2e76f4df0f1f12829f02acb4fb6b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 16 Oct 2025 13:28:48 -0400 Subject: [PATCH 0798/1075] ring-buffer: Do not warn in ring_buffer_map_get_reader() when reader catches up The function ring_buffer_map_get_reader() is a bit more strict than the other get reader functions, and except for certain situations the rb_get_reader_page() should not return NULL. If it does, it triggers a warning. This warning was triggering but after looking at why, it was because another acceptable situation was happening and it wasn't checked for. If the reader catches up to the writer and there's still data to be read on the reader page, then the rb_get_reader_page() will return NULL as there's no new page to get. In this situation, the reader page should not be updated and no warning should trigger. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Reported-by: syzbot+92a3745cea5ec6360309@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/690babec.050a0220.baf87.0064.GAE@google.com/ Link: https://lore.kernel.org/20251016132848.1b11bb37@gandalf.local.home Fixes: 117c39200d9d7 ("ring-buffer: Introducing ring-buffer mapping functions") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 1244d2c5c384..afcd3747264d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -7344,6 +7344,10 @@ int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu) goto out; } + /* Did the reader catch up with the writer? */ + if (cpu_buffer->reader_page == cpu_buffer->commit_page) + goto out; + reader = rb_get_reader_page(cpu_buffer); if (WARN_ON(!reader)) goto out; From 80f0d631dcc76ee1b7755bfca1d8417d91d71414 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Thu, 6 Nov 2025 12:01:32 +0000 Subject: [PATCH 0799/1075] tracing: Fix memory leaks in create_field_var() The function create_field_var() allocates memory for 'val' through create_hist_field() inside parse_atom(), and for 'var' through create_var(), which in turn allocates var->type and var->var.name internally. Simply calling kfree() to release these structures will result in memory leaks. Use destroy_hist_field() to properly free 'val', and explicitly release the memory of var->type and var->var.name before freeing 'var' itself. Link: https://patch.msgid.link/20251106120132.3639920-1-zilin@seu.edu.cn Fixes: 02205a6752f22 ("tracing: Add support for 'field variables'") Signed-off-by: Zilin Guan Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1d536219b624..6bfaf1210dd2 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3272,14 +3272,16 @@ static struct field_var *create_field_var(struct hist_trigger_data *hist_data, var = create_var(hist_data, file, field_name, val->size, val->type); if (IS_ERR(var)) { hist_err(tr, HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); - kfree(val); + destroy_hist_field(val, 0); ret = PTR_ERR(var); goto err; } field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL); if (!field_var) { - kfree(val); + destroy_hist_field(val, 0); + kfree_const(var->type); + kfree(var->var.name); kfree(var); ret = -ENOMEM; goto err; From 32b415a9dc2c212e809b7ebc2b14bc3fbda2b9af Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Tue, 21 Oct 2025 14:01:28 -0500 Subject: [PATCH 0800/1075] drm/vmwgfx: Validate command header size against SVGA_CMD_MAX_DATASIZE This data originates from userspace and is used in buffer offset calculations which could potentially overflow causing an out-of-bounds access. Fixes: 8ce75f8ab904 ("drm/vmwgfx: Update device includes for DX device functionality") Reported-by: Rohit Keshri Signed-off-by: Ian Forbes Reviewed-by: Maaz Mombasawala Signed-off-by: Zack Rusin Link: https://patch.msgid.link/20251021190128.13014-1-ian.forbes@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index d539f25b5fbe..3057f8baa7d2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3668,6 +3668,11 @@ static int vmw_cmd_check(struct vmw_private *dev_priv, cmd_id = header->id; + if (header->size > SVGA_CMD_MAX_DATASIZE) { + VMW_DEBUG_USER("SVGA3D command: %d is too big.\n", + cmd_id + SVGA_3D_CMD_BASE); + return -E2BIG; + } *size = header->size + sizeof(SVGA3dCmdHeader); cmd_id -= SVGA_3D_CMD_BASE; From c1962742ffff7e245f935903a4658eb6f94f6058 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Thu, 30 Oct 2025 14:36:40 -0500 Subject: [PATCH 0801/1075] drm/vmwgfx: Use kref in vmw_bo_dirty Rather than using an ad hoc reference count use kref which is atomic and has underflow warnings. Signed-off-by: Ian Forbes Signed-off-by: Zack Rusin Link: https://patch.msgid.link/20251030193640.153697-1-ian.forbes@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 7de20e56082c..fd4e76486f2d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -32,22 +32,22 @@ enum vmw_bo_dirty_method { /** * struct vmw_bo_dirty - Dirty information for buffer objects + * @ref_count: Reference count for this structure. Must be first member! * @start: First currently dirty bit * @end: Last currently dirty bit + 1 * @method: The currently used dirty method * @change_count: Number of consecutive method change triggers - * @ref_count: Reference count for this structure * @bitmap_size: The size of the bitmap in bits. Typically equal to the * nuber of pages in the bo. * @bitmap: A bitmap where each bit represents a page. A set bit means a * dirty page. */ struct vmw_bo_dirty { + struct kref ref_count; unsigned long start; unsigned long end; enum vmw_bo_dirty_method method; unsigned int change_count; - unsigned int ref_count; unsigned long bitmap_size; unsigned long bitmap[]; }; @@ -221,7 +221,7 @@ int vmw_bo_dirty_add(struct vmw_bo *vbo) int ret; if (dirty) { - dirty->ref_count++; + kref_get(&dirty->ref_count); return 0; } @@ -235,7 +235,7 @@ int vmw_bo_dirty_add(struct vmw_bo *vbo) dirty->bitmap_size = num_pages; dirty->start = dirty->bitmap_size; dirty->end = 0; - dirty->ref_count = 1; + kref_init(&dirty->ref_count); if (num_pages < PAGE_SIZE / sizeof(pte_t)) { dirty->method = VMW_BO_DIRTY_PAGETABLE; } else { @@ -274,10 +274,8 @@ void vmw_bo_dirty_release(struct vmw_bo *vbo) { struct vmw_bo_dirty *dirty = vbo->dirty; - if (dirty && --dirty->ref_count == 0) { - kvfree(dirty); + if (dirty && kref_put(&dirty->ref_count, (void *)kvfree)) vbo->dirty = NULL; - } } /** From eef295a8508202e750e4f103a97447f3c9d5e3d0 Mon Sep 17 00:00:00 2001 From: Ian Forbes Date: Mon, 3 Nov 2025 14:19:20 -0600 Subject: [PATCH 0802/1075] drm/vmwgfx: Restore Guest-Backed only cursor plane support The referenced fixes commit broke the cursor plane for configurations which have Guest-Backed surfaces but no cursor MOB support. Fixes: 965544150d1c ("drm/vmwgfx: Refactor cursor handling") Signed-off-by: Ian Forbes Signed-off-by: Zack Rusin Link: https://patch.msgid.link/20251103201920.381503-1-ian.forbes@broadcom.com --- drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c | 16 +++++++++++++++- drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c index 718832b08d96..c46f17ba7236 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.c @@ -100,8 +100,10 @@ vmw_cursor_update_type(struct vmw_private *vmw, struct vmw_plane_state *vps) if (vmw->has_mob) { if ((vmw->capabilities2 & SVGA_CAP2_CURSOR_MOB) != 0) return VMW_CURSOR_UPDATE_MOB; + else + return VMW_CURSOR_UPDATE_GB_ONLY; } - + drm_warn_once(&vmw->drm, "Unknown Cursor Type!\n"); return VMW_CURSOR_UPDATE_NONE; } @@ -139,6 +141,7 @@ static u32 vmw_cursor_mob_size(enum vmw_cursor_update_type update_type, { switch (update_type) { case VMW_CURSOR_UPDATE_LEGACY: + case VMW_CURSOR_UPDATE_GB_ONLY: case VMW_CURSOR_UPDATE_NONE: return 0; case VMW_CURSOR_UPDATE_MOB: @@ -623,6 +626,7 @@ int vmw_cursor_plane_prepare_fb(struct drm_plane *plane, if (!surface || vps->cursor.legacy.id == surface->snooper.id) vps->cursor.update_type = VMW_CURSOR_UPDATE_NONE; break; + case VMW_CURSOR_UPDATE_GB_ONLY: case VMW_CURSOR_UPDATE_MOB: { bo = vmw_user_object_buffer(&vps->uo); if (bo) { @@ -737,6 +741,7 @@ void vmw_cursor_plane_atomic_update(struct drm_plane *plane, struct drm_atomic_state *state) { + struct vmw_bo *bo; struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct drm_plane_state *old_state = @@ -762,6 +767,15 @@ vmw_cursor_plane_atomic_update(struct drm_plane *plane, case VMW_CURSOR_UPDATE_MOB: vmw_cursor_update_mob(dev_priv, vps); break; + case VMW_CURSOR_UPDATE_GB_ONLY: + bo = vmw_user_object_buffer(&vps->uo); + if (bo) + vmw_send_define_cursor_cmd(dev_priv, bo->map.virtual, + vps->base.crtc_w, + vps->base.crtc_h, + vps->base.hotspot_x, + vps->base.hotspot_y); + break; case VMW_CURSOR_UPDATE_NONE: /* do nothing */ break; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h b/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h index 40694925a70e..0c2cc0699b0d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cursor_plane.h @@ -33,6 +33,7 @@ static const u32 __maybe_unused vmw_cursor_plane_formats[] = { enum vmw_cursor_update_type { VMW_CURSOR_UPDATE_NONE = 0, VMW_CURSOR_UPDATE_LEGACY, + VMW_CURSOR_UPDATE_GB_ONLY, VMW_CURSOR_UPDATE_MOB, }; From 29528c8e643bb0c54da01237a35010c6438423d2 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Fri, 7 Nov 2025 13:49:59 +0800 Subject: [PATCH 0803/1075] ASoC: tas2781: fix getting the wrong device number The return value of device_property_read_u32_array used for getting the property is the status instead of the number of the property. Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20251107054959.950-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- sound/soc/codecs/tas2781-i2c.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index ba880b5de7e8..8f37aa00e62e 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -1957,7 +1957,8 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) { struct i2c_client *client = (struct i2c_client *)tas_priv->client; unsigned int dev_addrs[TASDEVICE_MAX_CHANNELS]; - int i, ndev = 0; + int ndev = 0; + int i, rc; if (tas_priv->isacpi) { ndev = device_property_read_u32_array(&client->dev, @@ -1968,8 +1969,12 @@ static void tasdevice_parse_dt(struct tasdevice_priv *tas_priv) } else { ndev = (ndev < ARRAY_SIZE(dev_addrs)) ? ndev : ARRAY_SIZE(dev_addrs); - ndev = device_property_read_u32_array(&client->dev, + rc = device_property_read_u32_array(&client->dev, "ti,audio-slots", dev_addrs, ndev); + if (rc != 0) { + ndev = 1; + dev_addrs[0] = client->addr; + } } tas_priv->irq = From 939edfaa10f1d22e6af6a84bf4bd96dc49c67302 Mon Sep 17 00:00:00 2001 From: Alvaro Gamez Machado Date: Thu, 6 Nov 2025 14:45:35 +0100 Subject: [PATCH 0804/1075] spi: xilinx: increase number of retries before declaring stall SPI devices using a (relative) slow frequency need a larger time. For instance, microblaze running at 83.25MHz and performing a 3 bytes transaction using a 10MHz/16 = 625kHz needed this stall value increased to at least 20. The SPI device is quite slow, but also is the microblaze, so set this value to 32 to give it even more margin. Signed-off-by: Alvaro Gamez Machado Reviewed-by: Ricardo Ribalda Link: https://patch.msgid.link/20251106134545.31942-1-alvaro.gamez@hazent.com Signed-off-by: Mark Brown --- drivers/spi/spi-xilinx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-xilinx.c b/drivers/spi/spi-xilinx.c index d59cc8a18484..c86dc56f38b4 100644 --- a/drivers/spi/spi-xilinx.c +++ b/drivers/spi/spi-xilinx.c @@ -300,7 +300,7 @@ static int xilinx_spi_txrx_bufs(struct spi_device *spi, struct spi_transfer *t) /* Read out all the data from the Rx FIFO */ rx_words = n_words; - stalled = 10; + stalled = 32; while (rx_words) { if (rx_words == n_words && !(stalled--) && !(sr & XSPI_SR_TX_EMPTY_MASK) && From 95af8f4fdce8349a5fe75264007f1af2aa1082ea Mon Sep 17 00:00:00 2001 From: Balasubramani Vivekanandan Date: Mon, 3 Nov 2025 18:01:47 +0530 Subject: [PATCH 0805/1075] drm/xe/guc: Synchronize Dead CT worker with unbind Cancel and wait for any Dead CT worker to complete before continuing with device unbinding. Else the worker will end up using resources freed by the undind operation. Cc: Zhanjun Dong Fixes: d2c5a5a926f4 ("drm/xe/guc: Dead CT helper") Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Stuart Summers Link: https://patch.msgid.link/20251103123144.3231829-6-balasubramani.vivekanandan@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 492671339114e376aaa38626d637a2751cdef263) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_ct.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 18f6327bf552..283d846c3512 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -200,6 +200,9 @@ static void guc_ct_fini(struct drm_device *drm, void *arg) { struct xe_guc_ct *ct = arg; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + cancel_work_sync(&ct->dead.worker); +#endif ct_exit_safe_mode(ct); destroy_workqueue(ct->g2h_wq); xa_destroy(&ct->fence_lookup); From 9cd27eec872f0b95dcdd811edc39d2d32e4158c8 Mon Sep 17 00:00:00 2001 From: Tejas Upadhyay Date: Tue, 7 Oct 2025 15:32:08 +0530 Subject: [PATCH 0806/1075] drm/xe: Move declarations under conditional branch The xe_device_shutdown() function was needing a few declarations that were only required under a specific condition. This change moves those declarations to be within that conditional branch to avoid unnecessary declarations. Reviewed-by: Nitin Gote Link: https://patchwork.freedesktop.org/patch/msgid/20251007100208.1407021-1-tejas.upadhyay@intel.com Signed-off-by: Tejas Upadhyay (cherry picked from commit 15b3036045188f4da4ca62b2ed01b0f160252e9b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 34d33965eac2..132530e5a3db 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -983,12 +983,12 @@ void xe_device_remove(struct xe_device *xe) void xe_device_shutdown(struct xe_device *xe) { - struct xe_gt *gt; - u8 id; - drm_dbg(&xe->drm, "Shutting down device\n"); if (xe_driver_flr_disabled(xe)) { + struct xe_gt *gt; + u8 id; + xe_display_pm_shutdown(xe); xe_irq_suspend(xe); From b11a020d914c3b7628f56a9ea476a5b03679489b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jouni=20H=C3=B6gander?= Date: Fri, 31 Oct 2025 14:23:11 +0200 Subject: [PATCH 0807/1075] drm/xe: Do clean shutdown also when using flr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently Xe driver is triggering flr without any clean-up on shutdown. This is causing random warnings from pending related works as the underlying hardware is reset in the middle of their execution. Fix this by performing clean shutdown also when using flr. Fixes: 501d799a47e2 ("drm/xe: Wire up device shutdown handler") Cc: Maarten Lankhorst Signed-off-by: Jouni Högander Reviewed-by: Maarten Lankhorst Link: https://patch.msgid.link/20251031122312.1836534-1-jouni.hogander@intel.com Signed-off-by: Maarten Lankhorst (cherry picked from commit a4ff26b7c8ef38e4dd34f77cbcd73576fdde6dd4) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_device.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 132530e5a3db..456899238377 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -983,21 +983,21 @@ void xe_device_remove(struct xe_device *xe) void xe_device_shutdown(struct xe_device *xe) { + struct xe_gt *gt; + u8 id; + drm_dbg(&xe->drm, "Shutting down device\n"); - if (xe_driver_flr_disabled(xe)) { - struct xe_gt *gt; - u8 id; + xe_display_pm_shutdown(xe); - xe_display_pm_shutdown(xe); + xe_irq_suspend(xe); - xe_irq_suspend(xe); + for_each_gt(gt, xe, id) + xe_gt_shutdown(gt); - for_each_gt(gt, xe, id) - xe_gt_shutdown(gt); + xe_display_pm_shutdown_late(xe); - xe_display_pm_shutdown_late(xe); - } else { + if (!xe_driver_flr_disabled(xe)) { /* BOOM! */ __xe_driver_flr(xe); } From d23550efc6800841b4d1639784afaebdea946ae0 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Thu, 6 Nov 2025 12:28:54 -0600 Subject: [PATCH 0808/1075] x86/microcode/AMD: Add more known models to entry sign checking Two Zen5 systems are missing from need_sha_check(). Add them. Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") Signed-off-by: Mario Limonciello (AMD) Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://patch.msgid.link/20251106182904.4143757-1-superm1@kernel.org --- arch/x86/kernel/cpu/microcode/amd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index b7c797dc94f4..dc82153009da 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -220,10 +220,12 @@ static bool need_sha_check(u32 cur_rev) case 0xaa001: return cur_rev <= 0xaa00116; break; case 0xaa002: return cur_rev <= 0xaa00218; break; case 0xb0021: return cur_rev <= 0xb002146; break; + case 0xb0081: return cur_rev <= 0xb008111; break; case 0xb1010: return cur_rev <= 0xb101046; break; case 0xb2040: return cur_rev <= 0xb204031; break; case 0xb4040: return cur_rev <= 0xb404031; break; case 0xb6000: return cur_rev <= 0xb600031; break; + case 0xb6080: return cur_rev <= 0xb608031; break; case 0xb7000: return cur_rev <= 0xb700031; break; default: break; } From 0995c2fc39b0f998d40f5d276f67ae22fc1c37c3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 31 Oct 2025 16:40:45 -0700 Subject: [PATCH 0809/1075] drm/xe: Enforce correct user fence signaling order using MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent application hangs caused by out-of-order fence signaling when user fences are attached. Use drm_syncobj (via dma-fence-chain) to guarantee that each user fence signals in order, regardless of the signaling order of the attached fences. Ensure user fence writebacks to user space occur in the correct sequence. v7: - Skip drm_syncbj create of error (CI) Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Signed-off-by: Matthew Brost Reviewed-by: Thomas Hellström Link: https://patch.msgid.link/20251031234050.3043507-2-matthew.brost@intel.com (cherry picked from commit adda4e855ab6409a3edaa585293f1f2069ab7299) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_exec.c | 3 +- drivers/gpu/drm/xe/xe_exec_queue.c | 14 ++++++++ drivers/gpu/drm/xe/xe_exec_queue_types.h | 7 ++++ drivers/gpu/drm/xe/xe_oa.c | 45 ++++++++++++++++-------- drivers/gpu/drm/xe/xe_oa_types.h | 8 +++++ drivers/gpu/drm/xe/xe_sync.c | 17 +++++++-- drivers/gpu/drm/xe/xe_sync.h | 3 ++ drivers/gpu/drm/xe/xe_sync_types.h | 3 ++ drivers/gpu/drm/xe/xe_vm.c | 4 +++ 9 files changed, 86 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 7715e74bb945..a8ab363a8046 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -165,7 +165,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file) for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], - &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | + &syncs_user[num_syncs], NULL, 0, + SYNC_PARSE_FLAG_EXEC | (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0)); if (err) diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 37b2b93b73d6..cb5f204c08ed 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "xe_dep_scheduler.h" @@ -324,6 +325,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, } xe_vm_put(migrate_vm); + if (!IS_ERR(q)) { + int err = drm_syncobj_create(&q->ufence_syncobj, + DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (err) { + xe_exec_queue_put(q); + return ERR_PTR(err); + } + } + return q; } ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); @@ -333,6 +344,9 @@ void xe_exec_queue_destroy(struct kref *ref) struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); struct xe_exec_queue *eq, *next; + if (q->ufence_syncobj) + drm_syncobj_put(q->ufence_syncobj); + if (xe_exec_queue_uses_pxp(q)) xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q); diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 27b76cf9da89..df1c69dc81f1 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -15,6 +15,7 @@ #include "xe_hw_fence_types.h" #include "xe_lrc_types.h" +struct drm_syncobj; struct xe_execlist_exec_queue; struct xe_gt; struct xe_guc_exec_queue; @@ -155,6 +156,12 @@ struct xe_exec_queue { struct list_head link; } pxp; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @ops: submission backend exec queue operations */ const struct xe_exec_queue_ops *ops; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a4894eb0d7f3..125698a9ecf1 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -1389,7 +1390,9 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro return 0; } -static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) +static int xe_oa_parse_syncs(struct xe_oa *oa, + struct xe_oa_stream *stream, + struct xe_oa_open_param *param) { int ret, num_syncs, num_ufence = 0; @@ -1409,7 +1412,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], - ¶m->syncs_user[num_syncs], 0); + ¶m->syncs_user[num_syncs], + stream->ufence_syncobj, + ++stream->ufence_timeline_value, 0); if (ret) goto err_syncs; @@ -1539,7 +1544,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return -ENODEV; param.xef = stream->xef; - err = xe_oa_parse_syncs(stream->oa, ¶m); + err = xe_oa_parse_syncs(stream->oa, stream, ¶m); if (err) goto err_config_put; @@ -1635,6 +1640,7 @@ static void xe_oa_destroy_locked(struct xe_oa_stream *stream) if (stream->exec_q) xe_exec_queue_put(stream->exec_q); + drm_syncobj_put(stream->ufence_syncobj); kfree(stream); } @@ -1826,6 +1832,7 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, struct xe_oa_open_param *param) { struct xe_oa_stream *stream; + struct drm_syncobj *ufence_syncobj; int stream_fd; int ret; @@ -1836,17 +1843,31 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, goto exit; } + ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, + NULL); + if (ret) + goto exit; + stream = kzalloc(sizeof(*stream), GFP_KERNEL); if (!stream) { ret = -ENOMEM; - goto exit; + goto err_syncobj; } - + stream->ufence_syncobj = ufence_syncobj; stream->oa = oa; - ret = xe_oa_stream_init(stream, param); + + ret = xe_oa_parse_syncs(oa, stream, param); if (ret) goto err_free; + ret = xe_oa_stream_init(stream, param); + if (ret) { + while (param->num_syncs--) + xe_sync_entry_cleanup(¶m->syncs[param->num_syncs]); + kfree(param->syncs); + goto err_free; + } + if (!param->disabled) { ret = xe_oa_enable_locked(stream); if (ret) @@ -1870,6 +1891,8 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, xe_oa_stream_destroy(stream); err_free: kfree(stream); +err_syncobj: + drm_syncobj_put(ufence_syncobj); exit: return ret; } @@ -2083,22 +2106,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - ret = xe_oa_parse_syncs(oa, ¶m); - if (ret) - goto err_exec_q; - mutex_lock(¶m.hwe->gt->oa.gt_lock); ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); mutex_unlock(¶m.hwe->gt->oa.gt_lock); if (ret < 0) - goto err_sync_cleanup; + goto err_exec_q; return ret; -err_sync_cleanup: - while (param.num_syncs--) - xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); - kfree(param.syncs); err_exec_q: if (param.exec_q) xe_exec_queue_put(param.exec_q); diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 2628f78c4e8d..daf701b5d48b 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -15,6 +15,8 @@ #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" +struct drm_syncobj; + #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M enum xe_oa_report_header { @@ -248,6 +250,12 @@ struct xe_oa_stream { /** @xef: xe_file with which the stream was opened */ struct xe_file *xef; + /** @ufence_syncobj: User fence syncobj */ + struct drm_syncobj *ufence_syncobj; + + /** @ufence_timeline_value: User fence timeline value */ + u64 ufence_timeline_value; + /** @last_fence: fence to use in stream destroy when needed */ struct dma_fence *last_fence; diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 82872a51f098..d48ab7b32ca5 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags) { struct drm_xe_sync sync_in; @@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, if (exec) { sync->addr = sync_in.addr; } else { + sync->ufence_timeline_value = ufence_timeline_value; sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) return PTR_ERR(sync->ufence); + sync->ufence_chain_fence = dma_fence_chain_alloc(); + if (!sync->ufence_chain_fence) + return -ENOMEM; + sync->ufence_syncobj = ufence_syncobj; } break; @@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence) } else if (sync->ufence) { int err; - dma_fence_get(fence); + drm_syncobj_add_point(sync->ufence_syncobj, + sync->ufence_chain_fence, + fence, sync->ufence_timeline_value); + sync->ufence_chain_fence = NULL; + + fence = drm_syncobj_fence_get(sync->ufence_syncobj); user_fence_get(sync->ufence); err = dma_fence_add_callback(fence, &sync->ufence->cb, user_fence_cb); @@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) drm_syncobj_put(sync->syncobj); dma_fence_put(sync->fence); dma_fence_chain_free(sync->chain_fence); - if (sync->ufence) + dma_fence_chain_free(sync->ufence_chain_fence); + if (!IS_ERR_OR_NULL(sync->ufence)) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h index 256ffc1e54dc..51f2d803e977 100644 --- a/drivers/gpu/drm/xe/xe_sync.h +++ b/drivers/gpu/drm/xe/xe_sync.h @@ -8,6 +8,7 @@ #include "xe_sync_types.h" +struct drm_syncobj; struct xe_device; struct xe_exec_queue; struct xe_file; @@ -21,6 +22,8 @@ struct xe_vm; int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, struct xe_sync_entry *sync, struct drm_xe_sync __user *sync_user, + struct drm_syncobj *ufence_syncobj, + u64 ufence_timeline_value, unsigned int flags); int xe_sync_entry_add_deps(struct xe_sync_entry *sync, struct xe_sched_job *job); diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h index 30ac3f51993b..b88f1833e28c 100644 --- a/drivers/gpu/drm/xe/xe_sync_types.h +++ b/drivers/gpu/drm/xe/xe_sync_types.h @@ -18,9 +18,12 @@ struct xe_sync_entry { struct drm_syncobj *syncobj; struct dma_fence *fence; struct dma_fence_chain *chain_fence; + struct dma_fence_chain *ufence_chain_fence; + struct drm_syncobj *ufence_syncobj; struct xe_user_fence *ufence; u64 addr; u64 timeline_value; + u64 ufence_timeline_value; u32 type; u32 flags; }; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 63c65e3d207b..ccb09ef4ec9e 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3606,8 +3606,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) syncs_user = u64_to_user_ptr(args->syncs); for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { + struct xe_exec_queue *__q = q ?: vm->q[0]; + err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], &syncs_user[num_syncs], + __q->ufence_syncobj, + ++__q->ufence_timeline_value, (xe_vm_in_lr_mode(vm) ? SYNC_PARSE_FLAG_LR_MODE : 0) | (!args->num_binds ? From 53afec2c8fb2a562222948cb1c2aac48598578c9 Mon Sep 17 00:00:00 2001 From: Zhang Chujun Date: Thu, 6 Nov 2025 11:10:40 +0800 Subject: [PATCH 0810/1075] tracing/tools: Fix incorrcet short option in usage text for --threads The help message incorrectly listed '-t' as the short option for --threads, but the actual getopt_long configuration uses '-e'. This mismatch can confuse users and lead to incorrect command-line usage. This patch updates the usage string to correctly show: "-e, --threads NRTHR" to match the implementation. Note: checkpatch.pl reports a false-positive spelling warning on 'Run', which is intentional. Link: https://patch.msgid.link/20251106031040.1869-1-zhangchujun@cmss.chinamobile.com Signed-off-by: Zhang Chujun Signed-off-by: Steven Rostedt (Google) --- tools/tracing/latency/latency-collector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c index cf263fe9deaf..ef97916e3873 100644 --- a/tools/tracing/latency/latency-collector.c +++ b/tools/tracing/latency/latency-collector.c @@ -1725,7 +1725,7 @@ static void show_usage(void) "-n, --notrace\t\tIf latency is detected, do not print out the content of\n" "\t\t\tthe trace file to standard output\n\n" -"-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" +"-e, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" "-r, --random\t\tArbitrarily sleep a certain amount of time, default\n" "\t\t\t%ld ms, before reading the trace file. The\n" From 535fdfc5a228524552ee8810c9175e877e127c27 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 6 Nov 2025 15:52:13 +0000 Subject: [PATCH 0811/1075] arm64: Use load LSE atomics for the non-return per-CPU atomic operations The non-return per-CPU this_cpu_*() atomic operations are implemented as STADD/STCLR/STSET when FEAT_LSE is available. On many microarchitecture implementations, these instructions tend to be executed "far" in the interconnect or memory subsystem (unless the data is already in the L1 cache). This is in general more efficient when there is contention as it avoids bouncing cache lines between CPUs. The load atomics (e.g. LDADD without XZR as destination), OTOH, tend to be executed "near" with the data loaded into the L1 cache. STADD executed back to back as in srcu_read_{lock,unlock}*() incur an additional overhead due to the default posting behaviour on several CPU implementations. Since the per-CPU atomics are unlikely to be used concurrently on the same memory location, encourage the hardware to to execute them "near" by issuing load atomics - LDADD/LDCLR/LDSET - with the destination register unused (but not XZR). Signed-off-by: Catalin Marinas Link: https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop Reported-by: Paul E. McKenney Tested-by: Paul E. McKenney Cc: Will Deacon Reviewed-by: Palmer Dabbelt [will: Add comment and link to the discussion thread] Signed-off-by: Will Deacon --- arch/arm64/include/asm/percpu.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 9abcc8ef3087..b57b2bb00967 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -77,7 +77,7 @@ __percpu_##name##_case_##sz(void *ptr, unsigned long val) \ " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \ " cbnz %w[loop], 1b", \ /* LSE atomics */ \ - #op_lse "\t%" #w "[val], %[ptr]\n" \ + #op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \ __nops(3)) \ : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \ [ptr] "+Q"(*(u##sz *)ptr) \ @@ -124,9 +124,16 @@ PERCPU_RW_OPS(8) PERCPU_RW_OPS(16) PERCPU_RW_OPS(32) PERCPU_RW_OPS(64) -PERCPU_OP(add, add, stadd) -PERCPU_OP(andnot, bic, stclr) -PERCPU_OP(or, orr, stset) + +/* + * Use value-returning atomics for CPU-local ops as they are more likely + * to execute "near" to the CPU (e.g. in L1$). + * + * https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop + */ +PERCPU_OP(add, add, ldadd) +PERCPU_OP(andnot, bic, ldclr) +PERCPU_OP(or, orr, ldset) PERCPU_RET_OP(add, add, ldadd) #undef PERCPU_RW_OPS From eeb8c19896952e18fb538ec76e603884070a6c6a Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 31 Oct 2025 11:11:37 +0000 Subject: [PATCH 0812/1075] Revert "ACPI: Suppress misleading SPCR console message when SPCR table is absent" This reverts commit bad3fa2fb9206f4dcec6ddef094ec2fbf6e8dcb2. Commit bad3fa2fb920 ("ACPI: Suppress misleading SPCR console message when SPCR table is absent") mistakenly assumes acpi_parse_spcr() returning 0 to indicate a failure to parse SPCR. While addressing the resultant incorrect logging it was deemed that dropping the message is a better approach as it is not particularly useful. Roll back the commit introducing the bug as a step towards dropping the log message. Link: https://lore.kernel.org/all/aQN0YWUYaPYWpgJM@willie-the-truck/ Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index 7aca29e1d30b..fd164e8a35b2 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -197,8 +197,6 @@ static int __init acpi_fadt_sanity_check(void) */ void __init acpi_boot_table_init(void) { - int ret; - /* * Enable ACPI instead of device tree unless * - ACPI has been disabled explicitly (acpi=off), or @@ -252,12 +250,10 @@ void __init acpi_boot_table_init(void) * behaviour, use acpi=nospcr to disable console in ACPI SPCR * table as default serial console. */ - ret = acpi_parse_spcr(earlycon_acpi_spcr_enable, + acpi_parse_spcr(earlycon_acpi_spcr_enable, !param_acpi_nospcr); - if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) - pr_info("Use ACPI SPCR as default console: No\n"); - else - pr_info("Use ACPI SPCR as default console: Yes\n"); + pr_info("Use ACPI SPCR as default console: %s\n", + param_acpi_nospcr ? "No" : "Yes"); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); From 7991fda619f7251994ab364f03f3e6fc0aa143d9 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 31 Oct 2025 11:11:38 +0000 Subject: [PATCH 0813/1075] arm64: acpi: Drop message logging SPCR default console Commit f5a4af3c7527 ("ACPI: Add acpi=nospcr to disable ACPI SPCR as default console on ARM64") introduced a command line parameter to prevent using SPCR provided console as default. It also introduced a message to log this choice. Drop the message as it is not particularly useful and can be incorrect in situations where no SPCR is provided by the firmware. Link: https://lore.kernel.org/all/aQN0YWUYaPYWpgJM@willie-the-truck/ Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index fd164e8a35b2..c022c1acb8c7 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -252,8 +252,6 @@ void __init acpi_boot_table_init(void) */ acpi_parse_spcr(earlycon_acpi_spcr_enable, !param_acpi_nospcr); - pr_info("Use ACPI SPCR as default console: %s\n", - param_acpi_nospcr ? "No" : "Yes"); if (IS_ENABLED(CONFIG_ACPI_BGRT)) acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); From 0ec364c0c95fc85bcbc88f1a9a06ebe83c88e18c Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 4 Nov 2025 13:49:47 -0800 Subject: [PATCH 0814/1075] arm64: kprobes: check the return value of set_memory_rox() Since commit a166563e7ec3 ("arm64: mm: support large block mapping when rodata=full"), __change_memory_common has more chance to fail due to memory allocation failure when splitting page table. So check the return value of set_memory_rox(), then bail out if it fails otherwise we may have RW memory mapping for kprobes insn page. Fixes: 195a1b7d8388 ("arm64: kprobes: call set_memory_rox() for kprobe page") Reviewed-by: Ryan Roberts Reviewed-by: Dev Jain Signed-off-by: Yang Shi Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 8ab6104a4883..43a0361a8bf0 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -49,7 +49,10 @@ void *alloc_insn_page(void) addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); if (!addr) return NULL; - set_memory_rox((unsigned long)addr, 1); + if (set_memory_rox((unsigned long)addr, 1)) { + execmem_free(addr); + return NULL; + } return addr; } From ce2b3a50ad922abbba36425343a1bcec46903a26 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 6 Nov 2025 16:09:41 +0000 Subject: [PATCH 0815/1075] arm64: mm: Don't sleep in split_kernel_leaf_mapping() when in atomic context It has been reported that split_kernel_leaf_mapping() is trying to sleep in non-sleepable context. It does this when acquiring the pgtable_split_lock mutex, when either CONFIG_DEBUG_PAGEALLOC or CONFIG_KFENCE are enabled, which change linear map permissions within softirq context during memory allocation and/or freeing. All other paths into this function are called from sleepable context and so are safe. But it turns out that the memory for which these 2 features may attempt to modify the permissions is always mapped by pte, so there is no need to attempt to split the mapping. So let's exit early in these cases and avoid attempting to take the mutex. There is one wrinkle to this approach; late-initialized kfence allocates it's pool from the buddy which may be block mapped. So we must hook that allocation and convert it to pte-mappings up front. Previously this was done as a side-effect of kfence protecting all the individual pages in its pool at init-time, but this no longer works due to the added early exit path in split_kernel_leaf_mapping(). So instead, do this via the existing arch_kfence_init_pool() arch hook, and reuse the existing linear_map_split_to_ptes() infrastructure. Closes: https://lore.kernel.org/all/f24b9032-0ec9-47b1-8b95-c0eeac7a31c5@roeck-us.net/ Fixes: a166563e7ec3 ("arm64: mm: support large block mapping when rodata=full") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Ryan Roberts Reviewed-by: David Hildenbrand (Red Hat) Reviewed-by: Yang Shi Signed-off-by: Will Deacon --- arch/arm64/include/asm/kfence.h | 3 +- arch/arm64/mm/mmu.c | 92 +++++++++++++++++++++++---------- 2 files changed, 67 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h index a81937fae9f6..21dbc9dda747 100644 --- a/arch/arm64/include/asm/kfence.h +++ b/arch/arm64/include/asm/kfence.h @@ -10,8 +10,6 @@ #include -static inline bool arch_kfence_init_pool(void) { return true; } - static inline bool kfence_protect_page(unsigned long addr, bool protect) { set_memory_valid(addr, 1, !protect); @@ -25,6 +23,7 @@ static inline bool arm64_kfence_can_set_direct_map(void) { return !kfence_early_init; } +bool arch_kfence_init_pool(void); #else /* CONFIG_KFENCE */ static inline bool arm64_kfence_can_set_direct_map(void) { return false; } #endif /* CONFIG_KFENCE */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index b8d37eb037fc..a364ac2c9c61 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -708,6 +708,16 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr) return ret; } +static inline bool force_pte_mapping(void) +{ + bool bbml2 = system_capabilities_finalized() ? + system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); + + return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || + is_realm_world())) || + debug_pagealloc_enabled(); +} + static DEFINE_MUTEX(pgtable_split_lock); int split_kernel_leaf_mapping(unsigned long start, unsigned long end) @@ -723,6 +733,16 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) if (!system_supports_bbml2_noabort()) return 0; + /* + * If the region is within a pte-mapped area, there is no need to try to + * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may + * change permissions from atomic context so for those cases (which are + * always pte-mapped), we must not go any further because taking the + * mutex below may sleep. + */ + if (force_pte_mapping() || is_kfence_address((void *)start)) + return 0; + /* * Ensure start and end are at least page-aligned since this is the * finest granularity we can split to. @@ -758,30 +778,30 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) return ret; } -static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pud_t pud = pudp_get(pudp); int ret = 0; if (pud_leaf(pud)) - ret = split_pud(pudp, pud, GFP_ATOMIC, false); + ret = split_pud(pudp, pud, gfp, false); return ret; } -static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, + unsigned long next, struct mm_walk *walk) { + gfp_t gfp = *(gfp_t *)walk->private; pmd_t pmd = pmdp_get(pmdp); int ret = 0; if (pmd_leaf(pmd)) { if (pmd_cont(pmd)) split_contpmd(pmdp); - ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); + ret = split_pmd(pmdp, pmd, gfp, false); /* * We have split the pmd directly to ptes so there is no need to @@ -793,9 +813,8 @@ static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, return ret; } -static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, - unsigned long next, - struct mm_walk *walk) +static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, + unsigned long next, struct mm_walk *walk) { pte_t pte = __ptep_get(ptep); @@ -805,12 +824,18 @@ static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, return 0; } -static const struct mm_walk_ops split_to_ptes_ops __initconst = { +static const struct mm_walk_ops split_to_ptes_ops = { .pud_entry = split_to_ptes_pud_entry, .pmd_entry = split_to_ptes_pmd_entry, .pte_entry = split_to_ptes_pte_entry, }; +static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp) +{ + return walk_kernel_page_table_range_lockless(start, end, + &split_to_ptes_ops, NULL, &gfp); +} + static bool linear_map_requires_bbml2 __initdata; u32 idmap_kpti_bbml2_flag; @@ -847,11 +872,9 @@ static int __init linear_map_split_to_ptes(void *__unused) * PTE. The kernel alias remains static throughout runtime so * can continue to be safely mapped with large mappings. */ - ret = walk_kernel_page_table_range_lockless(lstart, kstart, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC); if (!ret) - ret = walk_kernel_page_table_range_lockless(kend, lend, - &split_to_ptes_ops, NULL, NULL); + ret = range_split_to_ptes(kend, lend, GFP_ATOMIC); if (ret) panic("Failed to split linear map\n"); flush_tlb_kernel_range(lstart, lend); @@ -1002,6 +1025,33 @@ static void __init arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); __kfence_pool = phys_to_virt(kfence_pool); } + +bool arch_kfence_init_pool(void) +{ + unsigned long start = (unsigned long)__kfence_pool; + unsigned long end = start + KFENCE_POOL_SIZE; + int ret; + + /* Exit early if we know the linear map is already pte-mapped. */ + if (!system_supports_bbml2_noabort() || force_pte_mapping()) + return true; + + /* Kfence pool is already pte-mapped for the early init case. */ + if (kfence_early_init) + return true; + + mutex_lock(&pgtable_split_lock); + ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL); + mutex_unlock(&pgtable_split_lock); + + /* + * Since the system supports bbml2_noabort, tlb invalidation is not + * required here; the pgtable mappings have been split to pte but larger + * entries may safely linger in the TLB. + */ + + return !ret; +} #else /* CONFIG_KFENCE */ static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } @@ -1009,16 +1059,6 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { #endif /* CONFIG_KFENCE */ -static inline bool force_pte_mapping(void) -{ - bool bbml2 = system_capabilities_finalized() ? - system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); - - return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || - is_realm_world())) || - debug_pagealloc_enabled(); -} - static void __init map_mem(pgd_t *pgdp) { static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN); From 40a292f701474f7c21b27911677485efa233e94e Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 6 Nov 2025 16:09:42 +0000 Subject: [PATCH 0816/1075] arm64: mm: Optimize range_split_to_ptes() Enter lazy_mmu mode while splitting a range of memory to pte mappings. This causes barriers, which would otherwise be emitted after every pte (and pmd/pud) write, to be deferred until exiting lazy_mmu mode. For large systems, this is expected to significantly speed up fallback to pte-mapping the linear map for the case where the boot CPU has BBML2_NOABORT, but secondary CPUs do not. I haven't directly measured it, but this is equivalent to commit 1fcb7cea8a5f ("arm64: mm: Batch dsb and isb when populating pgtables"). Note that for the path from arch_kfence_init_pool(), we may sleep while allocating memory inside the lazy_mmu mode. Sleeping is not allowed by generic code inside lazy_mmu, but we know that the arm64 implementation is sleep-safe. So this is ok and follows the same pattern already used by split_kernel_leaf_mapping(). Signed-off-by: Ryan Roberts Reviewed-by: Yang Shi Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a364ac2c9c61..652bb8c14035 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -832,8 +832,14 @@ static const struct mm_walk_ops split_to_ptes_ops = { static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp) { - return walk_kernel_page_table_range_lockless(start, end, + int ret; + + arch_enter_lazy_mmu_mode(); + ret = walk_kernel_page_table_range_lockless(start, end, &split_to_ptes_ops, NULL, &gfp); + arch_leave_lazy_mmu_mode(); + + return ret; } static bool linear_map_requires_bbml2 __initdata; From 53357f14f924a06cced46069755bb10c2a6891c1 Mon Sep 17 00:00:00 2001 From: Ryan Roberts Date: Thu, 6 Nov 2025 16:09:43 +0000 Subject: [PATCH 0817/1075] arm64: mm: Tidy up force_pte_mapping() Tidy up the implementation of force_pte_mapping() to make it easier to read and introduce the split_leaf_mapping_possible() helper to reduce code duplication in split_kernel_leaf_mapping() and arch_kfence_init_pool(). Suggested-by: David Hildenbrand (Red Hat) Signed-off-by: Ryan Roberts Reviewed-by: David Hildenbrand (Red Hat) Reviewed-by: Yang Shi Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 652bb8c14035..2ba01dc8ef82 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -710,12 +710,26 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr) static inline bool force_pte_mapping(void) { - bool bbml2 = system_capabilities_finalized() ? + const bool bbml2 = system_capabilities_finalized() ? system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); - return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || - is_realm_world())) || - debug_pagealloc_enabled(); + if (debug_pagealloc_enabled()) + return true; + if (bbml2) + return false; + return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); +} + +static inline bool split_leaf_mapping_possible(void) +{ + /* + * !BBML2_NOABORT systems should never run into scenarios where we would + * have to split. So exit early and let calling code detect it and raise + * a warning. + */ + if (!system_supports_bbml2_noabort()) + return false; + return !force_pte_mapping(); } static DEFINE_MUTEX(pgtable_split_lock); @@ -725,22 +739,11 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end) int ret; /* - * !BBML2_NOABORT systems should not be trying to change permissions on - * anything that is not pte-mapped in the first place. Just return early - * and let the permission change code raise a warning if not already - * pte-mapped. + * Exit early if the region is within a pte-mapped area or if we can't + * split. For the latter case, the permission change code will raise a + * warning if not already pte-mapped. */ - if (!system_supports_bbml2_noabort()) - return 0; - - /* - * If the region is within a pte-mapped area, there is no need to try to - * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may - * change permissions from atomic context so for those cases (which are - * always pte-mapped), we must not go any further because taking the - * mutex below may sleep. - */ - if (force_pte_mapping() || is_kfence_address((void *)start)) + if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) return 0; /* @@ -1039,7 +1042,7 @@ bool arch_kfence_init_pool(void) int ret; /* Exit early if we know the linear map is already pte-mapped. */ - if (!system_supports_bbml2_noabort() || force_pte_mapping()) + if (!split_leaf_mapping_possible()) return true; /* Kfence pool is already pte-mapped for the early init case. */ From 62e72463ca714073962eda450e80c5d71dfb0dcb Mon Sep 17 00:00:00 2001 From: shechenglong Date: Fri, 31 Oct 2025 17:15:05 +0800 Subject: [PATCH 0818/1075] arm64: proton-pack: Drop print when !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY Following the pattern established with other Spectre mitigations, do not print a message when the CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY Kconfig option is disabled. Suggested-by: Will Deacon Signed-off-by: shechenglong Signed-off-by: Will Deacon --- arch/arm64/kernel/proton-pack.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index f9a32dfde006..d833b7c1bba8 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1042,8 +1042,6 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { /* No point mitigating Spectre-BHB alone. */ - } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { - pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); } else if (cpu_mitigations_off() || __nospectre_bhb) { pr_info_once("spectre-bhb mitigation disabled by command line option\n"); } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { From 7f1635737823a6c0c412ccf3767a12bec642c10f Mon Sep 17 00:00:00 2001 From: shechenglong Date: Fri, 31 Oct 2025 17:15:06 +0800 Subject: [PATCH 0819/1075] arm64: proton-pack: Fix hard lockup due to print in scheduler context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relocate the printk() calls from spectre_v4_mitigations_off() and spectre_v2_mitigations_off() into setup_system_capabilities() function, preventing hard lockups caused by printk calls in scheduler context: | _raw_spin_lock_nested+168 | ttwu_queue+180 (rq_lock(rq, &rf); 2nd acquiring the rq->__lock) | try_to_wake_up+548 | wake_up_process+32 | __up+88 | up+100 | __up_console_sem+96 | console_unlock+696 | vprintk_emit+428 | vprintk_default+64 | vprintk_func+220 | printk+104 | spectre_v4_enable_task_mitigation+344 | __switch_to+100 | __schedule+1028 (rq_lock(rq, &rf); 1st acquiring the rq->__lock) | schedule_idle+48 | do_idle+388 | cpu_startup_entry+44 | secondary_start_kernel+352 Suggested-by: Mark Rutland Suggested-by: Catalin Marinas Suggested-by: Will Deacon Signed-off-by: shechenglong Signed-off-by: Will Deacon --- arch/arm64/include/asm/spectre.h | 1 + arch/arm64/kernel/cpufeature.c | 6 ++++++ arch/arm64/kernel/proton-pack.c | 33 +++++++++++++++++--------------- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 8fef12626090..900454aaa292 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -117,6 +117,7 @@ void spectre_bhb_patch_wa3(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void spectre_bhb_patch_clearbhb(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); +void spectre_print_disabled_mitigations(void); #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5ed401ff79e3..e25b0f84a22d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -95,6 +95,7 @@ #include #include +#include /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; @@ -3875,6 +3876,11 @@ static void __init setup_system_capabilities(void) */ if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); + + /* + * Report Spectre mitigations status. + */ + spectre_print_disabled_mitigations(); } void __init setup_system_features(void) diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index d833b7c1bba8..c7d70d04c164 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -91,12 +91,7 @@ early_param("nospectre_v2", parse_spectre_v2_param); static bool spectre_v2_mitigations_off(void) { - bool ret = __nospectre_v2 || cpu_mitigations_off(); - - if (ret) - pr_info_once("spectre-v2 mitigation disabled by command line option\n"); - - return ret; + return __nospectre_v2 || cpu_mitigations_off(); } static const char *get_bhb_affected_string(enum mitigation_state bhb_state) @@ -421,13 +416,8 @@ early_param("ssbd", parse_spectre_v4_param); */ static bool spectre_v4_mitigations_off(void) { - bool ret = cpu_mitigations_off() || - __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; - - if (ret) - pr_info_once("spectre-v4 mitigation disabled by command-line option\n"); - - return ret; + return cpu_mitigations_off() || + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; } /* Do we need to toggle the mitigation state on entry to/exit from the kernel? */ @@ -1042,8 +1032,6 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { /* No point mitigating Spectre-BHB alone. */ - } else if (cpu_mitigations_off() || __nospectre_bhb) { - pr_info_once("spectre-bhb mitigation disabled by command line option\n"); } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { state = SPECTRE_MITIGATED; set_bit(BHB_HW, &system_bhb_mitigations); @@ -1197,3 +1185,18 @@ void unpriv_ebpf_notify(int new_state) pr_err("WARNING: %s", EBPF_WARN); } #endif + +void spectre_print_disabled_mitigations(void) +{ + /* Keep a single copy of the common message suffix to avoid duplication. */ + const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n"; + + if (spectre_v2_mitigations_off()) + pr_info("spectre-v2 %s", spectre_disabled_suffix); + + if (spectre_v4_mitigations_off()) + pr_info("spectre-v4 %s", spectre_disabled_suffix); + + if (__nospectre_bhb || cpu_mitigations_off()) + pr_info("spectre-bhb %s", spectre_disabled_suffix); +} From 6d4a0fbd34a40c9f877b136de874dc3498031309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Barna=C5=9B?= Date: Mon, 22 Sep 2025 13:04:26 +0000 Subject: [PATCH 0820/1075] arm64: Fail module loading if dynamic SCS patching fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disallow a module to load if SCS dynamic patching fails for its code. For module loading, instead of running a dry-run to check for patching errors, try to run patching in the first run and propagate any errors so module loading will fail. Signed-off-by: Adrian Barnaś Reviewed-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/scs.h | 2 +- arch/arm64/kernel/module.c | 12 ++++++++++-- arch/arm64/kernel/pi/map_kernel.c | 2 +- arch/arm64/kernel/pi/patch-scs.c | 10 ++++++---- arch/arm64/kernel/pi/pi.h | 2 +- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index a76f9b387a26..c59f6324f2bb 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -53,7 +53,7 @@ enum { EDYNSCS_INVALID_CFA_OPCODE = 4, }; -int __pi_scs_patch(const u8 eh_frame[], int size); +int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); #endif /* __ASSEMBLY __ */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index d6d443c4a01a..01acbff8a1ae 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -495,10 +495,18 @@ int module_finalize(const Elf_Ehdr *hdr, if (scs_is_dynamic()) { s = find_section(hdr, sechdrs, ".init.eh_frame"); if (s) { - ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size); - if (ret) + /* + * Because we can reject modules that are malformed + * so SCS patching fails, skip dry run and try to patch + * it in place. If patching fails, the module would not + * be loaded anyway. + */ + ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true); + if (ret) { pr_err("module %s: error occurred during dynamic SCS patching (%d)\n", me->name, ret); + return -ENOEXEC; + } } } diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c index e8ddbde31a83..659297f87cfa 100644 --- a/arch/arm64/kernel/pi/map_kernel.c +++ b/arch/arm64/kernel/pi/map_kernel.c @@ -104,7 +104,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) if (enable_scs) { scs_patch(__eh_frame_start + va_offset, - __eh_frame_end - __eh_frame_start); + __eh_frame_end - __eh_frame_start, false); asm("ic ialluis"); dynamic_scs_is_enabled = true; diff --git a/arch/arm64/kernel/pi/patch-scs.c b/arch/arm64/kernel/pi/patch-scs.c index 55d0cd64ef71..bbe7d30ed12b 100644 --- a/arch/arm64/kernel/pi/patch-scs.c +++ b/arch/arm64/kernel/pi/patch-scs.c @@ -225,7 +225,7 @@ static int scs_handle_fde_frame(const struct eh_frame *frame, return 0; } -int scs_patch(const u8 eh_frame[], int size) +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run) { int code_alignment_factor = 1; bool fde_use_sdata8 = false; @@ -277,11 +277,13 @@ int scs_patch(const u8 eh_frame[], int size) } } else { ret = scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, true); + fde_use_sdata8, !skip_dry_run); if (ret) return ret; - scs_handle_fde_frame(frame, code_alignment_factor, - fde_use_sdata8, false); + + if (!skip_dry_run) + scs_handle_fde_frame(frame, code_alignment_factor, + fde_use_sdata8, false); } p += sizeof(frame->size) + frame->size; diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h index 08ef9f80456b..aec3172d4003 100644 --- a/arch/arm64/kernel/pi/pi.h +++ b/arch/arm64/kernel/pi/pi.h @@ -27,7 +27,7 @@ extern pgd_t init_pg_dir[], init_pg_end[]; void init_feature_override(u64 boot_status, const void *fdt, int chosen); u64 kaslr_early_init(void *fdt, int chosen); void relocate_kernel(u64 offset); -int scs_patch(const u8 eh_frame[], int size); +int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, pgprot_t prot, int level, pte_t *tbl, bool may_use_cont, From 8e8ae788964aa2573b4335026db4068540fa6a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Barna=C5=9B?= Date: Mon, 22 Sep 2025 13:04:27 +0000 Subject: [PATCH 0821/1075] arm64: Reject modules with internal alternative callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During module loading, check if a callback function used by the alternatives specified in the '.altinstruction' ELF section (if present) is located in core kernel .text. If not fail module loading before callback is called. Reported-by: Fanqin Cui Closes: https://lore.kernel.org/all/20250807072700.348514-1-fanqincui@163.com/ Signed-off-by: Adrian Barnaś Reviewed-by: Ard Biesheuvel [will: Folded in 'noinstr' tweak from Mark] Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 7 +++++-- arch/arm64/kernel/alternative.c | 19 ++++++++++++------- arch/arm64/kernel/module.c | 9 +++++++-- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 00d97b8a757f..51746005239b 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -26,9 +26,12 @@ void __init apply_alternatives_all(void); bool alternative_is_applied(u16 cpucap); #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length); +int apply_alternatives_module(void *start, size_t length); #else -static inline void apply_alternatives_module(void *start, size_t length) { } +static inline int apply_alternatives_module(void *start, size_t length) +{ + return 0; +} #endif void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr, diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 8ff6610af496..f5ec7e7c1d3f 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -139,9 +139,9 @@ static noinstr void clean_dcache_range_nopatch(u64 start, u64 end) } while (cur += d_size, cur < end); } -static void __apply_alternatives(const struct alt_region *region, - bool is_module, - unsigned long *cpucap_mask) +static int __apply_alternatives(const struct alt_region *region, + bool is_module, + unsigned long *cpucap_mask) { struct alt_instr *alt; __le32 *origptr, *updptr; @@ -166,10 +166,13 @@ static void __apply_alternatives(const struct alt_region *region, updptr = is_module ? origptr : lm_alias(origptr); nr_inst = alt->orig_len / AARCH64_INSN_SIZE; - if (ALT_HAS_CB(alt)) + if (ALT_HAS_CB(alt)) { alt_cb = ALT_REPL_PTR(alt); - else + if (is_module && !core_kernel_text((unsigned long)alt_cb)) + return -ENOEXEC; + } else { alt_cb = patch_alternative; + } alt_cb(alt, origptr, updptr, nr_inst); @@ -193,6 +196,8 @@ static void __apply_alternatives(const struct alt_region *region, bitmap_and(applied_alternatives, applied_alternatives, system_cpucaps, ARM64_NCAPS); } + + return 0; } static void __init apply_alternatives_vdso(void) @@ -277,7 +282,7 @@ void __init apply_boot_alternatives(void) } #ifdef CONFIG_MODULES -void apply_alternatives_module(void *start, size_t length) +int apply_alternatives_module(void *start, size_t length) { struct alt_region region = { .begin = start, @@ -287,7 +292,7 @@ void apply_alternatives_module(void *start, size_t length) bitmap_fill(all_capabilities, ARM64_NCAPS); - __apply_alternatives(®ion, true, &all_capabilities[0]); + return __apply_alternatives(®ion, true, &all_capabilities[0]); } #endif diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 01acbff8a1ae..24adb581af0e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -489,8 +489,13 @@ int module_finalize(const Elf_Ehdr *hdr, int ret; s = find_section(hdr, sechdrs, ".altinstructions"); - if (s) - apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (s) { + ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (ret < 0) { + pr_err("module %s: error occurred when applying alternatives\n", me->name); + return ret; + } + } if (scs_is_dynamic()) { s = find_section(hdr, sechdrs, ".init.eh_frame"); From 62b9ca1706e1bbb60d945a58de7c7b5826f6b2a2 Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Wed, 5 Nov 2025 22:51:05 -0600 Subject: [PATCH 0822/1075] PM: hibernate: Emit an error when image writing fails If image writing fails, a return code is passed up to the caller, but none of the callers log anything to the log and so the only record of it is the return code that userspace gets. Adjust the logging so that the image size and speed of writing is only emitted on success and if there is an error, it's saved to the logs. Fixes: a06c6f5d3cc9 ("PM: hibernate: Move to crypto APIs for LZO compression") Reported-by: Askar Safin Closes: https://lore.kernel.org/linux-pm/20251105180506.137448-1-safinaskar@gmail.com/ Signed-off-by: Mario Limonciello (AMD) Tested-by: Askar Safin Cc: 6.9+ # 6.9+ [ rjw: Added missing braces after "else", changelog edits ] Link: https://patch.msgid.link/20251106045158.3198061-2-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 0beff7eeaaba..7daa716d2cb1 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -877,11 +877,14 @@ static int save_compressed_image(struct swap_map_handle *handle, stop = ktime_get(); if (!ret) ret = err2; - if (!ret) + if (!ret) { + swsusp_show_speed(start, stop, nr_to_write, "Wrote"); + pr_info("Image size after compression: %d kbytes\n", + (atomic_read(&compressed_size) / 1024)); pr_info("Image saving done\n"); - swsusp_show_speed(start, stop, nr_to_write, "Wrote"); - pr_info("Image size after compression: %d kbytes\n", - (atomic_read(&compressed_size) / 1024)); + } else { + pr_err("Image saving failed: %d\n", ret); + } out_clean: hib_finish_batch(&hb); From 66ededc694f1d06a71ca35a3c8e3689e9b85b3ce Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Wed, 5 Nov 2025 22:51:06 -0600 Subject: [PATCH 0823/1075] PM: hibernate: Use atomic64_t for compressed_size variable `compressed_size` can overflow, showing nonsensical values. Change from `atomic_t` to `atomic64_t` to prevent overflow. Fixes: a06c6f5d3cc9 ("PM: hibernate: Move to crypto APIs for LZO compression") Reported-by: Askar Safin Closes: https://lore.kernel.org/linux-pm/20251105180506.137448-1-safinaskar@gmail.com/ Signed-off-by: Mario Limonciello (AMD) Tested-by: Askar Safin Cc: 6.9+ # 6.9+ Link: https://patch.msgid.link/20251106045158.3198061-3-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 7daa716d2cb1..e0441483dbee 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -635,7 +635,7 @@ struct cmp_data { }; /* Indicates the image size after compression */ -static atomic_t compressed_size = ATOMIC_INIT(0); +static atomic64_t compressed_size = ATOMIC_INIT(0); /* * Compression function that runs in its own thread. @@ -664,7 +664,7 @@ static int compress_threadfn(void *data) d->ret = crypto_acomp_compress(d->cr); d->cmp_len = d->cr->dlen; - atomic_set(&compressed_size, atomic_read(&compressed_size) + d->cmp_len); + atomic64_add(d->cmp_len, &compressed_size); atomic_set_release(&d->stop, 1); wake_up(&d->done); } @@ -696,7 +696,7 @@ static int save_compressed_image(struct swap_map_handle *handle, hib_init_batch(&hb); - atomic_set(&compressed_size, 0); + atomic64_set(&compressed_size, 0); /* * We'll limit the number of threads for compression to limit memory @@ -879,8 +879,8 @@ static int save_compressed_image(struct swap_map_handle *handle, ret = err2; if (!ret) { swsusp_show_speed(start, stop, nr_to_write, "Wrote"); - pr_info("Image size after compression: %d kbytes\n", - (atomic_read(&compressed_size) / 1024)); + pr_info("Image size after compression: %lld kbytes\n", + (atomic64_read(&compressed_size) / 1024)); pr_info("Image saving done\n"); } else { pr_err("Image saving failed: %d\n", ret); From 0b6c10cb8479d0d1b7b208277df2e2afe082d4bd Mon Sep 17 00:00:00 2001 From: "Mario Limonciello (AMD)" Date: Wed, 5 Nov 2025 22:51:07 -0600 Subject: [PATCH 0824/1075] PM: hibernate: Fix style issues in save_compressed_image() Address two issues indicated by checkpatch: - Trailing statements should be on next line. - Prefer 'unsigned int' to bare use of 'unsigned'. Signed-off-by: Mario Limonciello (AMD) [ rjw: Changelog edits ] Link: https://patch.msgid.link/20251106045158.3198061-4-superm1@kernel.org Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/power/swap.c b/kernel/power/swap.c index e0441483dbee..70ae21f7370d 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -689,7 +689,7 @@ static int save_compressed_image(struct swap_map_handle *handle, ktime_t start; ktime_t stop; size_t off; - unsigned thr, run_threads, nr_threads; + unsigned int thr, run_threads, nr_threads; unsigned char *page = NULL; struct cmp_data *data = NULL; struct crc_data *crc = NULL; @@ -902,7 +902,8 @@ static int save_compressed_image(struct swap_map_handle *handle, } vfree(data); } - if (page) free_page((unsigned long)page); + if (page) + free_page((unsigned long)page); return ret; } From 4012abe8a78fbb8869634130024266eaef7081fe Mon Sep 17 00:00:00 2001 From: Joshua Rogers Date: Fri, 7 Nov 2025 00:09:37 +0800 Subject: [PATCH 0825/1075] smb: client: validate change notify buffer before copy SMB2_change_notify called smb2_validate_iov() but ignored the return code, then kmemdup()ed using server provided OutputBufferOffset/Length. Check the return of smb2_validate_iov() and bail out on error. Discovered with help from the ZeroPath security tooling. Signed-off-by: Joshua Rogers Reviewed-by: Paulo Alcantara (Red Hat) Cc: stable@vger.kernel.org Fixes: e3e9463414f61 ("smb3: improve SMB3 change notification support") Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index b0739a2661bf..8b4a4573e9c3 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -4054,9 +4054,12 @@ SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon, smb_rsp = (struct smb2_change_notify_rsp *)rsp_iov.iov_base; - smb2_validate_iov(le16_to_cpu(smb_rsp->OutputBufferOffset), - le32_to_cpu(smb_rsp->OutputBufferLength), &rsp_iov, + rc = smb2_validate_iov(le16_to_cpu(smb_rsp->OutputBufferOffset), + le32_to_cpu(smb_rsp->OutputBufferLength), + &rsp_iov, sizeof(struct file_notify_information)); + if (rc) + goto cnotify_exit; *out_data = kmemdup((char *)smb_rsp + le16_to_cpu(smb_rsp->OutputBufferOffset), le32_to_cpu(smb_rsp->OutputBufferLength), GFP_KERNEL); From b6cfddd26ec55e865b4715f73e9bbb17a15091ed Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 31 Oct 2025 10:32:24 -0700 Subject: [PATCH 0826/1075] cxl: Adjust offset calculation for poison injection The HPA to DPA translation for poison injection assumes that the base address starts from where the CXL region begins. When the extended linear cache is active, the offset can be within the DRAM region. Adjust the offset so that it correctly reflects the offset within the CXL region. [ dj: Add fixes tag from Alison ] Fixes: c3dd67681c70 ("cxl/region: Add inject and clear poison by region offset") Link: https://patch.msgid.link/20251031173224.3537030-5-dave.jiang@intel.com Reviewed-by: Alison Schofield Signed-off-by: Dave Jiang --- drivers/cxl/core/region.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index b06fee1978ba..41b64d871c5a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3702,6 +3702,7 @@ static int cxl_region_debugfs_poison_inject(void *data, u64 offset) if (validate_region_offset(cxlr, offset)) return -EINVAL; + offset -= cxlr->params.cache_size; rc = region_offset_to_dpa_result(cxlr, offset, &result); if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { dev_dbg(&cxlr->dev, @@ -3734,6 +3735,7 @@ static int cxl_region_debugfs_poison_clear(void *data, u64 offset) if (validate_region_offset(cxlr, offset)) return -EINVAL; + offset -= cxlr->params.cache_size; rc = region_offset_to_dpa_result(cxlr, offset, &result); if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { dev_dbg(&cxlr->dev, From 4fe5934db4a7187d358f1af1b3ef9b6dd59bce58 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 7 Nov 2025 13:11:41 +0530 Subject: [PATCH 0827/1075] ACPI: CPPC: Detect preferred core availability on online CPUs Commit 279f838a61f9 ("x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()") introduced the ability to detect the preferred core on AMD platforms by checking if there at least two distinct highest_perf values. However, it uses for_each_present_cpu() to iterate through all the CPUs in the platform, which is problematic when the kernel is booted with "nosmt=force" commandline option. Hence limit the search to only the online CPUs. Fixes: 279f838a61f9 ("x86/amd: Detect preferred cores in amd_get_boost_ratio_numerator()") Reported-by: Christopher Harris Closes: https://lore.kernel.org/lkml/CAM+eXpdDT7KjLV0AxEwOLkSJ2QtrsvGvjA2cCHvt1d0k2_C4Cw@mail.gmail.com/ Reviewed-by: "Mario Limonciello (AMD) (kernel.org)" Tested-by: Chrisopher Harris Signed-off-by: Gautham R. Shenoy Link: https://patch.msgid.link/20251107074145.2340-2-gautham.shenoy@amd.com Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/cppc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 7047124490f6..d7c8ef1e354d 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -196,7 +196,7 @@ int amd_detect_prefcore(bool *detected) break; } - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { u32 tmp; int ret; From 6dd3b8a709a130a4d55c866af9804c81b8486d28 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 7 Nov 2025 13:11:42 +0530 Subject: [PATCH 0828/1075] ACPI: CPPC: Check _CPC validity for only the online CPUs per_cpu(cpc_desc_ptr, cpu) object is initialized for only the online CPUs via acpi_soft_cpu_online() --> __acpi_processor_start() --> acpi_cppc_processor_probe(). However the function acpi_cpc_valid() checks for the validity of the _CPC object for all the present CPUs. This breaks when the kernel is booted with "nosmt=force". Hence check the validity of the _CPC objects of only the online CPUs. Fixes: 2aeca6bd0277 ("ACPI: CPPC: Check present CPUs for determining _CPC is valid") Reported-by: Christopher Harris Closes: https://lore.kernel.org/lkml/CAM+eXpdDT7KjLV0AxEwOLkSJ2QtrsvGvjA2cCHvt1d0k2_C4Cw@mail.gmail.com/ Suggested-by: Mario Limonciello Reviewed-by: "Mario Limonciello (AMD) (kernel.org)" Tested-by: Chrisopher Harris Signed-off-by: Gautham R. Shenoy Link: https://patch.msgid.link/20251107074145.2340-3-gautham.shenoy@amd.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 6c684e54fe01..da6b35ac8c87 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -460,7 +460,7 @@ bool acpi_cpc_valid(void) if (acpi_disabled) return false; - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); if (!cpc_ptr) return false; From 8821c8e80a65bc4eb73daf63b34aac6b8ad69461 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 7 Nov 2025 13:11:43 +0530 Subject: [PATCH 0829/1075] ACPI: CPPC: Perform fast check switch only for online CPUs per_cpu(cpc_desc_ptr, cpu) object is initialized for only the online CPUs via acpi_soft_cpu_online() --> __acpi_processor_start() --> acpi_cppc_processor_probe(). However the function cppc_allow_fast_switch() checks for the validity of the _CPC object for all the present CPUs. This breaks when the kernel is booted with "nosmt=force". Check fast_switch capability only on online CPUs Fixes: 15eece6c5b05 ("ACPI: CPPC: Fix NULL pointer dereference when nosmp is used") Reviewed-by: "Mario Limonciello (AMD) (kernel.org)" Signed-off-by: Gautham R. Shenoy Link: https://patch.msgid.link/20251107074145.2340-4-gautham.shenoy@amd.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index da6b35ac8c87..7492c9922866 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -476,7 +476,7 @@ bool cppc_allow_fast_switch(void) struct cpc_desc *cpc_ptr; int cpu; - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { cpc_ptr = per_cpu(cpc_desc_ptr, cpu); desired_reg = &cpc_ptr->cpc_regs[DESIRED_PERF]; if (!CPC_IN_SYSTEM_MEMORY(desired_reg) && From 0fce75870666b46b700cfbd3216380b422f975da Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 7 Nov 2025 13:11:44 +0530 Subject: [PATCH 0830/1075] ACPI: CPPC: Limit perf ctrs in PCC check only to online CPUs per_cpu(cpc_desc_ptr, cpu) object is initialized for only the online CPU via acpi_soft_cpu_online() --> __acpi_processor_start() --> acpi_cppc_processor_probe(). However the function cppc_perf_ctrs_in_pcc() checks if the CPPC perf-ctrs are in a PCC region for all the present CPUs, which breaks when the kernel is booted with "nosmt=force". Hence, limit the check only to the online CPUs. Fixes: ae2df912d1a5 ("ACPI: CPPC: Disable FIE if registers in PCC regions") Reviewed-by: "Mario Limonciello (AMD) (kernel.org)" Signed-off-by: Gautham R. Shenoy Link: https://patch.msgid.link/20251107074145.2340-5-gautham.shenoy@amd.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 7492c9922866..3bdeeee3414e 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1435,7 +1435,7 @@ bool cppc_perf_ctrs_in_pcc(void) { int cpu; - for_each_present_cpu(cpu) { + for_each_online_cpu(cpu) { struct cpc_register_resource *ref_perf_reg; struct cpc_desc *cpc_desc; From 9818af18db4bfefd320d0fef41390a616365e6f7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Nov 2025 11:50:00 +0100 Subject: [PATCH 0831/1075] compiler_types: Move unused static inline functions warning to W=2 Per Nathan, clang catches unused "static inline" functions in C files since commit 6863f5643dd7 ("kbuild: allow Clang to find unused static inline functions for W=1 build"). Linus said: > So I entirely ignore W=1 issues, because I think so many of the extra > warnings are bogus. > > But if this one in particular is causing more problems than most - > some teams do seem to use W=1 as part of their test builds - it's fine > to send me a patch that just moves bad warnings to W=2. > > And if anybody uses W=2 for their test builds, that's THEIR problem.. Here is the change to bump the warning from W=1 to W=2. Fixes: 6863f5643dd7 ("kbuild: allow Clang to find unused static inline functions for W=1 build") Signed-off-by: Peter Zijlstra Signed-off-by: Andy Shevchenko Link: https://patch.msgid.link/20251106105000.2103276-1-andriy.shevchenko@linux.intel.com [nathan: Adjust comment as well] Signed-off-by: Nathan Chancellor --- include/linux/compiler_types.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 59288a2c1ad2..339603f05b54 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -250,10 +250,9 @@ struct ftrace_likely_data { /* * GCC does not warn about unused static inline functions for -Wunused-function. * Suppress the warning in clang as well by using __maybe_unused, but enable it - * for W=1 build. This will allow clang to find unused functions. Remove the - * __inline_maybe_unused entirely after fixing most of -Wunused-function warnings. + * for W=2 build. This will allow clang to find unused functions. */ -#ifdef KBUILD_EXTRA_WARN1 +#ifdef KBUILD_EXTRA_WARN2 #define __inline_maybe_unused #else #define __inline_maybe_unused __maybe_unused From 2cf95b9baa52262bfb645cb3c04f902dd50c29e2 Mon Sep 17 00:00:00 2001 From: Shubhrajyoti Datta Date: Thu, 23 Oct 2025 17:01:08 +0530 Subject: [PATCH 0832/1075] EDAC/versalnet: Handle split messages for non-standard errors The current code assumes that only DDR errors have split messages. Ensure proper logging of non-standard event errors that may be split across multiple messages too. [ bp: Massage, move comment too, fix it up. ] Fixes: d5fe2fec6c40 ("EDAC: Add a driver for the AMD Versal NET DDR controller") Signed-off-by: Shubhrajyoti Datta Signed-off-by: Borislav Petkov (AMD) Link: https://patch.msgid.link/20251023113108.3467132-1-shubhrajyoti.datta@amd.com --- drivers/edac/versalnet_edac.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/edac/versalnet_edac.c b/drivers/edac/versalnet_edac.c index 1ded4c3f0213..1a1092793092 100644 --- a/drivers/edac/versalnet_edac.c +++ b/drivers/edac/versalnet_edac.c @@ -605,21 +605,23 @@ static int rpmsg_cb(struct rpmsg_device *rpdev, void *data, length = result[MSG_ERR_LENGTH]; offset = result[MSG_ERR_OFFSET]; + /* + * The data can come in two stretches. Construct the regs from two + * messages. The offset indicates the offset from which the data is to + * be taken. + */ + for (i = 0 ; i < length; i++) { + k = offset + i; + j = ERROR_DATA + i; + mc_priv->regs[k] = result[j]; + } + if (result[TOTAL_ERR_LENGTH] > length) { if (!mc_priv->part_len) mc_priv->part_len = length; else mc_priv->part_len += length; - /* - * The data can come in 2 stretches. Construct the regs from 2 - * messages the offset indicates the offset from which the data is to - * be taken - */ - for (i = 0 ; i < length; i++) { - k = offset + i; - j = ERROR_DATA + i; - mc_priv->regs[k] = result[j]; - } + if (mc_priv->part_len < result[TOTAL_ERR_LENGTH]) return 0; mc_priv->part_len = 0; @@ -705,7 +707,7 @@ static int rpmsg_cb(struct rpmsg_device *rpdev, void *data, /* Convert to bytes */ length = result[TOTAL_ERR_LENGTH] * 4; log_non_standard_event(sec_type, &amd_versalnet_guid, mc_priv->message, - sec_sev, (void *)&result[ERROR_DATA], length); + sec_sev, (void *)&mc_priv->regs, length); return 0; } From 4b93d211bbffd3dce76664d95f2306d23e7215ce Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Thu, 30 Oct 2025 08:02:28 +0530 Subject: [PATCH 0833/1075] ACPI: MRRM: Fix memory leaks and improve error handling Add proper error handling and resource cleanup to prevent memory leaks in add_boot_memory_ranges(). The function now checks for NULL return from kobject_create_and_add(), uses local buffer for range names to avoid dynamic allocation, and implements a cleanup path that removes previously created sysfs groups and kobjects on failure. This prevents resource leaks when kobject creation or sysfs group creation fails during boot memory range initialization. Signed-off-by: Kaushlendra Kumar Reviewed-by: Tony Luck Link: https://patch.msgid.link/20251030023228.3956296-1-kaushlendra.kumar@intel.com Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_mrrm.c | 51 +++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/acpi_mrrm.c b/drivers/acpi/acpi_mrrm.c index a6dbf623e557..6d69554c940e 100644 --- a/drivers/acpi/acpi_mrrm.c +++ b/drivers/acpi/acpi_mrrm.c @@ -152,26 +152,49 @@ ATTRIBUTE_GROUPS(memory_range); static __init int add_boot_memory_ranges(void) { - struct kobject *pkobj, *kobj; + struct kobject *pkobj, *kobj, **kobjs; int ret = -EINVAL; - char *name; + char name[16]; + int i; pkobj = kobject_create_and_add("memory_ranges", acpi_kobj); + if (!pkobj) + return -ENOMEM; - for (int i = 0; i < mrrm_mem_entry_num; i++) { - name = kasprintf(GFP_KERNEL, "range%d", i); - if (!name) { - ret = -ENOMEM; - break; - } - - kobj = kobject_create_and_add(name, pkobj); - - ret = sysfs_create_groups(kobj, memory_range_groups); - if (ret) - return ret; + kobjs = kcalloc(mrrm_mem_entry_num, sizeof(*kobjs), GFP_KERNEL); + if (!kobjs) { + kobject_put(pkobj); + return -ENOMEM; } + for (i = 0; i < mrrm_mem_entry_num; i++) { + scnprintf(name, sizeof(name), "range%d", i); + kobj = kobject_create_and_add(name, pkobj); + if (!kobj) { + ret = -ENOMEM; + goto cleanup; + } + + ret = sysfs_create_groups(kobj, memory_range_groups); + if (ret) { + kobject_put(kobj); + goto cleanup; + } + kobjs[i] = kobj; + } + + kfree(kobjs); + return 0; + +cleanup: + for (int j = 0; j < i; j++) { + if (kobjs[j]) { + sysfs_remove_groups(kobjs[j], memory_range_groups); + kobject_put(kobjs[j]); + } + } + kfree(kobjs); + kobject_put(pkobj); return ret; } From 146eb58629f45f8297e83d69e64d4eea4b28d972 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 7 Nov 2025 18:41:26 +0000 Subject: [PATCH 0834/1075] io_uring: fix regbuf vector size truncation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a report of io_estimate_bvec_size() truncating the calculated number of segments that leads to corruption issues. Check it doesn't overflow "int"s used later. Rough but simple, can be improved on top. Cc: stable@vger.kernel.org Fixes: 9ef4cbbcb4ac3 ("io_uring: add infra for importing vectored reg buffers") Reported-by: Google Big Sleep Signed-off-by: Pavel Begunkov Reviewed-by: Günther Noack Tested-by: Günther Noack Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index d787c16dc1c3..2602d76d5ff0 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1403,8 +1403,11 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs, size_t max_segs = 0; unsigned i; - for (i = 0; i < nr_iovs; i++) + for (i = 0; i < nr_iovs; i++) { max_segs += (iov[i].iov_len >> shift) + 2; + if (max_segs > INT_MAX) + return -EOVERFLOW; + } return max_segs; } @@ -1510,7 +1513,11 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter, if (unlikely(ret)) return ret; } else { - nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); + int ret = io_estimate_bvec_size(iov, nr_iovs, imu); + + if (ret < 0) + return ret; + nr_segs = ret; } if (sizeof(struct bio_vec) > sizeof(struct iovec)) { From 3ad1b71fdc5707d14332d9ae710a237de936be9b Mon Sep 17 00:00:00 2001 From: Feng Jiang Date: Wed, 29 Oct 2025 17:44:28 +0800 Subject: [PATCH 0835/1075] riscv: Build loader.bin exclusively for Canaan K210 According to the explanation in commit ef10bdf9c3e6 ("riscv: Kconfig.socs: Split ARCH_CANAAN and SOC_CANAAN_K210"), loader.bin is a special feature of the Canaan K210 and is not applicable to other SoCs. Fixes: e79dfcbfb902 ("riscv: make image compression configurable") Signed-off-by: Feng Jiang Reviewed-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20251029094429.553842-1-jiangfeng@kylinos.cn Signed-off-by: Paul Walmsley --- arch/riscv/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index ecf2fcce2d92..3998d4036f15 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -159,7 +159,7 @@ boot-image-$(CONFIG_KERNEL_LZO) := Image.lzo boot-image-$(CONFIG_KERNEL_ZSTD) := Image.zst boot-image-$(CONFIG_KERNEL_XZ) := Image.xz ifdef CONFIG_RISCV_M_MODE -boot-image-$(CONFIG_ARCH_CANAAN) := loader.bin +boot-image-$(CONFIG_SOC_CANAAN_K210) := loader.bin endif boot-image-$(CONFIG_EFI_ZBOOT) := vmlinuz.efi boot-image-$(CONFIG_XIP_KERNEL) := xipImage From 5e8632987dd1882ed4d1e1039032ab1b0c1ec12b Mon Sep 17 00:00:00 2001 From: Feng Jiang Date: Wed, 29 Oct 2025 17:44:29 +0800 Subject: [PATCH 0836/1075] riscv: Remove redundant judgment for the default build target The value of KBUILD_IMAGE is derived from $(boot-image-y), so there's no need for redundant checks before this. Signed-off-by: Feng Jiang Reviewed-by: Emil Renner Berthing Link: https://lore.kernel.org/r/20251029094429.553842-2-jiangfeng@kylinos.cn Signed-off-by: Paul Walmsley --- arch/riscv/Makefile | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 3998d4036f15..4c6de57f65ef 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -134,21 +134,6 @@ endif CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) # Default target when executing plain make -boot := arch/riscv/boot -ifeq ($(CONFIG_XIP_KERNEL),y) -KBUILD_IMAGE := $(boot)/xipImage -else -ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy) -KBUILD_IMAGE := $(boot)/loader.bin -else -ifeq ($(CONFIG_EFI_ZBOOT),) -KBUILD_IMAGE := $(boot)/Image.gz -else -KBUILD_IMAGE := $(boot)/vmlinuz.efi -endif -endif -endif - boot := arch/riscv/boot boot-image-y := Image boot-image-$(CONFIG_KERNEL_BZIP2) := Image.bz2 From dc20452e6caf962f04ede7f364267b0c37784ab4 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 7 Nov 2025 14:56:59 -0700 Subject: [PATCH 0837/1075] riscv: Fix CONFIG_AS_HAS_INSN for new .insn usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 44aa25c000b4 ("riscv: asm: use .insn for making custom instructions"), builds using LLVM older that 19 or binutils older than 2.38 fail with: arch/riscv/include/asm/vdso/processor.h: Assembler messages: arch/riscv/include/asm/vdso/processor.h:27: Error: unrecognized opcode `0x100000f' arch/riscv/include/asm/vdso/processor.h:27: Error: unrecognized opcode `0x100000f' arch/riscv/include/asm/vdso/processor.h:27: Error: unrecognized opcode `0x100000f' arch/riscv/include/asm/vdso/processor.h:27: Error: unrecognized opcode `0x100000f' make[4]: *** [scripts/Makefile.build:287: arch/riscv/kernel/vdso/vgettimeofday.o] Error 1 In file included from :4: In file included from lib/vdso/gettimeofday.c:6: In file included from include/vdso/datapage.h:21: In file included from include/vdso/processor.h:10: arch/riscv/include/asm/vdso/processor.h:23:2: error: expected instruction format 23 | ALT_RISCV_PAUSE(); | ^ arch/riscv/include/asm/errata_list.h:47:3: note: expanded from macro 'ALT_RISCV_PAUSE' 47 | RISCV_PAUSE, /* Original RISC‑V pause insn */ \ | ^ arch/riscv/include/asm/insn-def.h:259:21: note: expanded from macro 'RISCV_PAUSE' 259 | #define RISCV_PAUSE ASM_INSN_I("0x100000f") | ^ arch/riscv/include/asm/asm.h:16:26: note: expanded from macro 'ASM_INSN_I' 16 | #define ASM_INSN_I(__x) ".insn " __x | ^ :5:7: note: instantiated into assembly here 5 | .insn 0x100000f | ^ binutils gained support for '.insn ' in 2.38 [1] and LLVM gained support in 19 [2]. Adjust the test for CONFIG_AS_HAS_INSN to ensure that all versions of .insn are supported before being used. Fixes: 44aa25c000b4 ("riscv: asm: use .insn for making custom instructions") Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=a262b82fdbf4cda3b0648b1adc32245ca3f78b7a [1] Link: https://github.com/llvm/llvm-project/commit/2a086dce691e3cc34a2fc27f4fb255bb2cbbfac9 [2] Suggested-by: Andrew Jones Signed-off-by: Nathan Chancellor Reviewed-by: Andrew Jones Link: https://patch.msgid.link/20251107-riscv-fix-new-insn-usage-v1-1-9a186c5928a0@kernel.org Signed-off-by: Paul Walmsley --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 22cda9c452d2..fadec20b87a8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -367,7 +367,7 @@ config RISCV_NONSTANDARD_CACHE_OPS systems to handle cache management. config AS_HAS_INSN - def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) + def_bool $(as-instr,.insn 0x100000f) config AS_HAS_OPTION_ARCH # https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4 From 4da4e4bde1c453ac5cc2dce5def81d504ae257ee Mon Sep 17 00:00:00 2001 From: Nate Karstens Date: Thu, 6 Nov 2025 16:28:33 -0600 Subject: [PATCH 0838/1075] strparser: Fix signed/unsigned mismatch bug The `len` member of the sk_buff is an unsigned int. This is cast to `ssize_t` (a signed type) for the first sk_buff in the comparison, but not the second sk_buff. On 32-bit systems, this can result in an integer underflow for certain values because unsigned arithmetic is being used. This appears to be an oversight: if the intention was to use unsigned arithmetic, then the first cast would have been omitted. The change ensures both len values are cast to `ssize_t`. The underflow causes an issue with ktls when multiple TLS PDUs are included in a single TCP segment. The mainline kernel does not use strparser for ktls anymore, but this is still useful for other features that still use strparser, and for backporting. Signed-off-by: Nate Karstens Cc: stable@vger.kernel.org Fixes: 43a0c6751a32 ("strparser: Stream parser for messages") Reviewed-by: Jacob Keller Reviewed-by: Sabrina Dubroca Link: https://patch.msgid.link/20251106222835.1871628-1-nate.karstens@garmin.com Signed-off-by: Jakub Kicinski --- net/strparser/strparser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 43b1f558b33d..e659fea2da70 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -238,7 +238,7 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, strp_parser_err(strp, -EMSGSIZE, desc); break; } else if (len <= (ssize_t)head->len - - skb->len - stm->strp.offset) { + (ssize_t)skb->len - stm->strp.offset) { /* Length must be into new skb (and also * greater than zero) */ From 57531b3416448d1ced36a2a974a4085ec43d57b0 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 6 Nov 2025 17:12:09 +0100 Subject: [PATCH 0839/1075] selftests: net: local_termination: Wait for interfaces to come up It seems that most of the tests prepare the interfaces once before the test run (setup_prepare()), rely on setup_wait() to wait for link and only then run the test(s). local_termination brings the physical interfaces down and up during test run but never wait for them to come up. If the auto-negotiation takes some seconds, first test packets are being lost, which leads to false-negative test results. Use setup_wait() in run_test() to make sure auto-negotiation has been completed after all simple_if_init() calls on physical interfaces and test packets will not be lost because of the race against link establishment. Fixes: 90b9566aa5cd3f ("selftests: forwarding: add a test for local_termination.sh") Reviewed-by: Vladimir Oltean Signed-off-by: Alexander Sverdlin Link: https://patch.msgid.link/20251106161213.459501-1-alexander.sverdlin@siemens.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/local_termination.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index ecd34f364125..892895659c7e 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -176,6 +176,8 @@ run_test() local rcv_dmac=$(mac_get $rcv_if_name) local should_receive + setup_wait + tcpdump_start $rcv_if_name mc_route_prepare $send_if_name From ad17e7e92a7c52ce70bb764813fcf99464f96903 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 6 Nov 2025 10:14:21 +0800 Subject: [PATCH 0840/1075] net: fec: correct rx_bytes statistic for the case SHIFT16 is set Two additional bytes in front of each frame received into the RX FIFO if SHIFT16 is set, so we need to subtract the extra two bytes from pkt_len to correct the statistic of rx_bytes. Fixes: 3ac72b7b63d5 ("net: fec: align IP header in hardware") Signed-off-by: Wei Fang Reviewed-by: Frank Li Link: https://patch.msgid.link/20251106021421.2096585-1-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1edcfaee6819..3222359ac15b 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1835,6 +1835,8 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget) ndev->stats.rx_packets++; pkt_len = fec16_to_cpu(bdp->cbd_datlen); ndev->stats.rx_bytes += pkt_len; + if (fep->quirks & FEC_QUIRK_HAS_RACC) + ndev->stats.rx_bytes -= 2; index = fec_enet_get_bd_index(bdp, &rxq->bd); page = rxq->rx_skb_info[index].page; From 96a9178a29a6b84bb632ebeb4e84cf61191c73d5 Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Thu, 6 Nov 2025 10:06:37 +0100 Subject: [PATCH 0841/1075] net: phy: micrel: lan8814 fix reset of the QSGMII interface The lan8814 is a quad-phy and it is using QSGMII towards the MAC. The problem is that everytime when one of the ports is configured then the PCS is reseted for all the PHYs. Meaning that the other ports can loose traffic until the link is establish again. To fix this, do the reset one time for the entire PHY package. Fixes: ece19502834d ("net: phy: micrel: 1588 support for LAN8814 phy") Signed-off-by: Horatiu Vultur Reviewed-by: Andrew Lunn Reviewed-by: Divya Koppera Link: https://patch.msgid.link/20251106090637.2030625-1-horatiu.vultur@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/micrel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 6a1a424e3b30..01c87c9b7702 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -4380,12 +4380,6 @@ static int lan8814_config_init(struct phy_device *phydev) { struct kszphy_priv *lan8814 = phydev->priv; - /* Reset the PHY */ - lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, - LAN8814_QSGMII_SOFT_RESET, - LAN8814_QSGMII_SOFT_RESET_BIT, - LAN8814_QSGMII_SOFT_RESET_BIT); - /* Disable ANEG with QSGMII PCS Host side */ lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, LAN8814_QSGMII_PCS1G_ANEG_CONFIG, @@ -4471,6 +4465,12 @@ static int lan8814_probe(struct phy_device *phydev) addr, sizeof(struct lan8814_shared_priv)); if (phy_package_init_once(phydev)) { + /* Reset the PHY */ + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, + LAN8814_QSGMII_SOFT_RESET, + LAN8814_QSGMII_SOFT_RESET_BIT, + LAN8814_QSGMII_SOFT_RESET_BIT); + err = lan8814_release_coma_mode(phydev); if (err) return err; From 411336159064c5e89e8b89d30c6855db4bb92814 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 8 Nov 2025 19:41:09 +1000 Subject: [PATCH 0842/1075] Revert "drm/nouveau: set DMA mask before creating the flush page" This reverts commit ebe755605082eddff80eafe0c50915b1366ee98f. Tested the latest kernel on my GB203 and this seems to break it somehow. Nov 09 04:16:14 bighp kernel: nouveau 0000:02:00.0: gsp: GSP-FMC boot failed (mbox: 0x0000000b) Nov 09 04:16:14 bighp kernel: nouveau 0000:02:00.0: gsp: init failed, -5 Nov 09 04:16:14 bighp kernel: nouveau 0000:02:00.0: init failed with -5 Nov 09 04:16:14 bighp kernel: nouveau: drm:00000000:00000080: init failed with -5 Nov 09 04:16:14 bighp kernel: nouveau 0000:02:00.0: drm: Device allocation failed: -5 Nov 09 04:16:14 bighp kernel: nouveau 0000:02:00.0: probe with driver nouveau failed with error -5 Not sure why, I went over the patch and thought it should have worked, but there must be some 32-bit problem maybe in the FMC boot path. Signed-off-by: Dave Airlie --- .../gpu/drm/nouveau/nvkm/engine/device/pci.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c index 7cc5a7499583..8f0261a0d618 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c @@ -1695,18 +1695,6 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg, *pdevice = &pdev->device; pdev->pdev = pci_dev; - /* Set DMA mask based on capabilities reported by the MMU subdev. */ - if (pdev->device.mmu && !pdev->device.pci->agp.bridge) - bits = pdev->device.mmu->dma_bits; - else - bits = 32; - - ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits)); - if (ret && bits != 32) { - dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); - pdev->device.mmu->dma_bits = 32; - } - ret = nvkm_device_ctor(&nvkm_device_pci_func, quirk, &pci_dev->dev, pci_is_pcie(pci_dev) ? NVKM_DEVICE_PCIE : pci_find_capability(pci_dev, PCI_CAP_ID_AGP) ? @@ -1720,5 +1708,17 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg, if (ret) return ret; + /* Set DMA mask based on capabilities reported by the MMU subdev. */ + if (pdev->device.mmu && !pdev->device.pci->agp.bridge) + bits = pdev->device.mmu->dma_bits; + else + bits = 32; + + ret = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(bits)); + if (ret && bits != 32) { + dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); + pdev->device.mmu->dma_bits = 32; + } + return 0; } From 3f9eacf4f0705876a5d6526d7d320ca91d7d7a16 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Oct 2025 12:27:05 +0000 Subject: [PATCH 0843/1075] KVM: arm64: Make all 32bit ID registers fully writable 32bit ID registers aren't getting much love these days, and are often missed in updates. One of these updates broke restoring a GICv2 guest on a GICv3 machine. Instead of performing a piecemeal fix, just bite the bullet and make all 32bit ID regs fully writable. KVM itself never relies on them for anything, and if the VMM wants to mess up the guest, so be it. Fixes: 5cb57a1aff755 ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest") Reported-by: Peter Maydell Cc: stable@vger.kernel.org Reviewed-by: Oliver Upton Link: https://patch.msgid.link/20251030122707.2033690-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 59 ++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e67eb39ddc11..ad82264c6cbe 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2595,19 +2595,23 @@ static bool bad_redir_trap(struct kvm_vcpu *vcpu, .val = 0, \ } -/* sys_reg_desc initialiser for known cpufeature ID registers */ -#define AA32_ID_SANITISED(name) { \ - ID_DESC(name), \ - .visibility = aa32_id_visibility, \ - .val = 0, \ -} - /* sys_reg_desc initialiser for writable ID registers */ #define ID_WRITABLE(name, mask) { \ ID_DESC(name), \ .val = mask, \ } +/* + * 32bit ID regs are fully writable when the guest is 32bit + * capable. Nothing in the KVM code should rely on 32bit features + * anyway, only 64bit, so let the VMM do its worse. + */ +#define AA32_ID_WRITABLE(name) { \ + ID_DESC(name), \ + .visibility = aa32_id_visibility, \ + .val = GENMASK(31, 0), \ +} + /* sys_reg_desc initialiser for cpufeature ID registers that need filtering */ #define ID_FILTERED(sysreg, name, mask) { \ ID_DESC(sysreg), \ @@ -3128,40 +3132,39 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 mappings of the AArch32 ID registers */ /* CRm=1 */ - AA32_ID_SANITISED(ID_PFR0_EL1), - AA32_ID_SANITISED(ID_PFR1_EL1), + AA32_ID_WRITABLE(ID_PFR0_EL1), + AA32_ID_WRITABLE(ID_PFR1_EL1), { SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg, .get_user = get_id_reg, .set_user = set_id_dfr0_el1, .visibility = aa32_id_visibility, .reset = read_sanitised_id_dfr0_el1, - .val = ID_DFR0_EL1_PerfMon_MASK | - ID_DFR0_EL1_CopDbg_MASK, }, + .val = GENMASK(31, 0) }, ID_HIDDEN(ID_AFR0_EL1), - AA32_ID_SANITISED(ID_MMFR0_EL1), - AA32_ID_SANITISED(ID_MMFR1_EL1), - AA32_ID_SANITISED(ID_MMFR2_EL1), - AA32_ID_SANITISED(ID_MMFR3_EL1), + AA32_ID_WRITABLE(ID_MMFR0_EL1), + AA32_ID_WRITABLE(ID_MMFR1_EL1), + AA32_ID_WRITABLE(ID_MMFR2_EL1), + AA32_ID_WRITABLE(ID_MMFR3_EL1), /* CRm=2 */ - AA32_ID_SANITISED(ID_ISAR0_EL1), - AA32_ID_SANITISED(ID_ISAR1_EL1), - AA32_ID_SANITISED(ID_ISAR2_EL1), - AA32_ID_SANITISED(ID_ISAR3_EL1), - AA32_ID_SANITISED(ID_ISAR4_EL1), - AA32_ID_SANITISED(ID_ISAR5_EL1), - AA32_ID_SANITISED(ID_MMFR4_EL1), - AA32_ID_SANITISED(ID_ISAR6_EL1), + AA32_ID_WRITABLE(ID_ISAR0_EL1), + AA32_ID_WRITABLE(ID_ISAR1_EL1), + AA32_ID_WRITABLE(ID_ISAR2_EL1), + AA32_ID_WRITABLE(ID_ISAR3_EL1), + AA32_ID_WRITABLE(ID_ISAR4_EL1), + AA32_ID_WRITABLE(ID_ISAR5_EL1), + AA32_ID_WRITABLE(ID_MMFR4_EL1), + AA32_ID_WRITABLE(ID_ISAR6_EL1), /* CRm=3 */ - AA32_ID_SANITISED(MVFR0_EL1), - AA32_ID_SANITISED(MVFR1_EL1), - AA32_ID_SANITISED(MVFR2_EL1), + AA32_ID_WRITABLE(MVFR0_EL1), + AA32_ID_WRITABLE(MVFR1_EL1), + AA32_ID_WRITABLE(MVFR2_EL1), ID_UNALLOCATED(3,3), - AA32_ID_SANITISED(ID_PFR2_EL1), + AA32_ID_WRITABLE(ID_PFR2_EL1), ID_HIDDEN(ID_DFR1_EL1), - AA32_ID_SANITISED(ID_MMFR5_EL1), + AA32_ID_WRITABLE(ID_MMFR5_EL1), ID_UNALLOCATED(3,7), /* AArch64 ID registers */ From 8a9866ff860052efc5f9766f3f87fae30c983156 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Oct 2025 12:27:06 +0000 Subject: [PATCH 0844/1075] KVM: arm64: Set ID_{AA64PFR0,PFR1}_EL1.GIC when GICv3 is configured Drive the idreg fields indicating the presence of GICv3 directly from the vgic code. This avoids having to do any sort of runtime clearing of the idreg. Fixes: 5cb57a1aff755 ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest") Reviewed-by: Oliver Upton Link: https://patch.msgid.link/20251030122707.2033690-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-init.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 1796b1a22a72..ca411cce4140 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -71,6 +71,7 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type); int kvm_vgic_create(struct kvm *kvm, u32 type) { struct kvm_vcpu *vcpu; + u64 aa64pfr0, pfr1; unsigned long i; int ret; @@ -161,10 +162,19 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) { kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - else + } else { INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); + pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); + } + + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); if (type == KVM_DEV_TYPE_ARM_VGIC_V3) kvm->arch.vgic.nassgicap = system_supports_direct_sgis(); From 50e7cce81b9b2fbd6f0104c1698959d45ce3cf58 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 30 Oct 2025 12:27:07 +0000 Subject: [PATCH 0845/1075] KVM: arm64: Limit clearing of ID_{AA64PFR0,PFR1}_EL1.GIC to userspace irqchip Now that the idreg's GIC field is in sync with the irqchip, limit the runtime clearing of these fields to the pathological case where we do not have an in-kernel GIC. While we're at it, use the existing API instead of open-coded accessors to access the ID regs. Fixes: 5cb57a1aff755 ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest") Reviewed-by: Oliver Upton Link: https://patch.msgid.link/20251030122707.2033690-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index ad82264c6cbe..8ae2bca81614 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -5609,11 +5609,13 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu) guard(mutex)(&kvm->arch.config_lock); - if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) && - irqchip_in_kernel(kvm) && - kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) { - kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK; - kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK; + if (!irqchip_in_kernel(kvm)) { + u64 val; + + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val); + val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val); } if (vcpu_has_nv(vcpu)) { From 75360a9a338580990c1ee188d40a838c025bbd30 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 7 Nov 2025 10:48:46 -0800 Subject: [PATCH 0846/1075] KVM: arm64: vgic-v3: Reinstate IRQ lock ordering for LPI xarray Zenghui reports that running a KVM guest with an assigned device and lockdep enabled produces an unfriendly splat due to an inconsistent irq context when taking the lpi_xa's spinlock. This is no good as in rare cases the last reference to an LPI can get dropped after injection of a cached LPI translation. In this case, vgic_put_irq() will release the IRQ struct and take the lpi_xa's spinlock to erase it from the xarray. Reinstate the IRQ ordering and update the lockdep hint accordingly. Note that there is no irqsave equivalent of might_lock(), so just explictly grab and release the spinlock on lockdep kernels. Reported-by: Zenghui Yu Closes: https://lore.kernel.org/kvmarm/b4d7cb0f-f007-0b81-46d1-998b15cc14bc@huawei.com/ Fixes: 982f31bbb5b0 ("KVM: arm64: vgic-v3: Don't require IRQs be disabled for LPI xarray lock") Signed-off-by: Oliver Upton Link: https://patch.msgid.link/20251107184847.1784820-2-oupton@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-debug.c | 16 ++++++++++++---- arch/arm64/kvm/vgic/vgic-init.c | 2 +- arch/arm64/kvm/vgic/vgic-its.c | 7 ++++--- arch/arm64/kvm/vgic/vgic.c | 23 +++++++++++++++-------- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index 4c1209261b65..bb92853d1fd3 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -64,29 +64,37 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) static int iter_mark_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid, flags; struct vgic_irq *irq; - unsigned long intid; int nr_lpis = 0; + xa_lock_irqsave(&dist->lpi_xa, flags); + xa_for_each(&dist->lpi_xa, intid, irq) { if (!vgic_try_get_irq_ref(irq)) continue; - xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + __xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); nr_lpis++; } + xa_unlock_irqrestore(&dist->lpi_xa, flags); + return nr_lpis; } static void iter_unmark_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long intid, flags; struct vgic_irq *irq; - unsigned long intid; xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { - xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + xa_lock_irqsave(&dist->lpi_xa, flags); + __xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + + /* vgic_put_irq() expects to be called outside of the xa_lock */ vgic_put_irq(kvm, irq); } } diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index ca411cce4140..da62edbc1205 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -53,7 +53,7 @@ void kvm_vgic_early_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - xa_init(&dist->lpi_xa); + xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); } /* CREATION */ diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index ce3e3ed3f29f..f162206adb48 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -78,6 +78,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; + unsigned long flags; int ret; /* In this case there is no put, since we keep the reference. */ @@ -88,7 +89,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (!irq) return ERR_PTR(-ENOMEM); - ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); + ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); if (ret) { kfree(irq); return ERR_PTR(ret); @@ -103,7 +104,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, irq->target_vcpu = vcpu; irq->group = 1; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); /* * There could be a race with another vgic_add_lpi(), so we need to @@ -125,7 +126,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, } out_unlock: - xa_unlock(&dist->lpi_xa); + xa_unlock_irqrestore(&dist->lpi_xa, flags); if (ret) return ERR_PTR(ret); diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 6dd5a10081e2..8d20c53faef0 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -28,7 +28,7 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * kvm->arch.config_lock (mutex) * its->cmd_lock (mutex) * its->its_lock (mutex) - * vgic_dist->lpi_xa.xa_lock + * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled * vgic_cpu->ap_list_lock must be taken with IRQs disabled * vgic_irq->irq_lock must be taken with IRQs disabled * @@ -141,32 +141,39 @@ static __must_check bool vgic_put_irq_norelease(struct kvm *kvm, struct vgic_irq void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long flags; - if (irq->intid >= VGIC_MIN_LPI) - might_lock(&dist->lpi_xa.xa_lock); + /* + * Normally the lock is only taken when the refcount drops to 0. + * Acquire/release it early on lockdep kernels to make locking issues + * in rare release paths a bit more obvious. + */ + if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) { + guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock); + } if (!__vgic_put_irq(kvm, irq)) return; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); vgic_release_lpi_locked(dist, irq); - xa_unlock(&dist->lpi_xa); + xa_unlock_irqrestore(&dist->lpi_xa, flags); } static void vgic_release_deleted_lpis(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - unsigned long intid; + unsigned long flags, intid; struct vgic_irq *irq; - xa_lock(&dist->lpi_xa); + xa_lock_irqsave(&dist->lpi_xa, flags); xa_for_each(&dist->lpi_xa, intid, irq) { if (irq->pending_release) vgic_release_lpi_locked(dist, irq); } - xa_unlock(&dist->lpi_xa); + xa_unlock_irqrestore(&dist->lpi_xa, flags); } void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) From 66768669f27d98b45b20ed401cca913c387a9934 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Fri, 7 Nov 2025 10:48:47 -0800 Subject: [PATCH 0847/1075] KVM: arm64: vgic-v3: Release reserved slot outside of lpi_xa's lock xa_release() expects to be called outside of the xa_lock. Fix vgic_add_lpi() to drop the lock before calling and restructure to get rid of the goto label. Reported-by: Zenghui Yu Closes: https://lore.kernel.org/kvmarm/d0853e82-7d95-5025-7abf-c6f1e0cdf7b5@huawei.com/ Fixes: 481c9ee846d2 ("KVM: arm64: vgic-its: Get rid of the lpi_list_lock") Signed-off-by: Oliver Upton Link: https://patch.msgid.link/20251107184847.1784820-3-oupton@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/vgic/vgic-its.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index f162206adb48..3f1c4b10fed9 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -115,21 +115,18 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, /* Someone was faster with adding this LPI, lets use that. */ kfree(irq); irq = oldirq; - - goto out_unlock; + } else { + ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); } - ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); + xa_unlock_irqrestore(&dist->lpi_xa, flags); + if (ret) { xa_release(&dist->lpi_xa, intid); kfree(irq); - } -out_unlock: - xa_unlock_irqrestore(&dist->lpi_xa, flags); - - if (ret) return ERR_PTR(ret); + } /* * We "cache" the configuration table entries in our struct vgic_irq's. From 4af235bf645516481a82227d82d1352b9788903a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 6 Nov 2025 17:28:25 -0800 Subject: [PATCH 0848/1075] MAINTAINERS: Switch myself to using kernel.org address I've been running into issues with the linux.dev email semi-periodically, switching to my kernel.org address while I go figure out a better home for my inbox. Signed-off-by: Oliver Upton Link: https://patch.msgid.link/20251107012830.1708225-1-oupton@kernel.org Signed-off-by: Marc Zyngier --- .mailmap | 3 ++- MAINTAINERS | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index d2edd256b19d..54dde6226079 100644 --- a/.mailmap +++ b/.mailmap @@ -603,7 +603,8 @@ Oleksij Rempel Oleksij Rempel Oliver Hartkopp Oliver Hartkopp -Oliver Upton +Oliver Upton +Oliver Upton Ondřej Jirman Oza Pawandeep Pali Rohár diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..234b50c2c10b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13642,7 +13642,7 @@ F: virt/kvm/* KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) M: Marc Zyngier -M: Oliver Upton +M: Oliver Upton R: Joey Gouly R: Suzuki K Poulose R: Zenghui Yu From 162f24cbb0f6ec596e7e9f3e91610d79dc805229 Mon Sep 17 00:00:00 2001 From: Yuta Hayama Date: Wed, 15 Oct 2025 12:07:05 +0900 Subject: [PATCH 0849/1075] rtc: rx8025: fix incorrect register reference This code is intended to operate on the CTRL1 register, but ctrl[1] is actually CTRL2. Correctly, ctrl[0] is CTRL1. Signed-off-by: Yuta Hayama Fixes: 71af91565052 ("rtc: rx8025: fix 12/24 hour mode detection on RX-8035") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/eae5f479-5d28-4a37-859d-d54794e7628c@lineo.co.jp Signed-off-by: Alexandre Belloni --- drivers/rtc/rtc-rx8025.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index aabe62c283a1..7e9f7cb90c28 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -316,7 +316,7 @@ static int rx8025_init_client(struct i2c_client *client) return hour_reg; rx8025->is_24 = (hour_reg & RX8035_BIT_HOUR_1224); } else { - rx8025->is_24 = (ctrl[1] & RX8025_BIT_CTRL1_1224); + rx8025->is_24 = (ctrl[0] & RX8025_BIT_CTRL1_1224); } out: return err; From 002621a4df3c166fab1427e8e502bc15acc26b13 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 7 Nov 2025 19:29:33 +0100 Subject: [PATCH 0850/1075] kbuild: Let kernel-doc.py use PYTHON3 override It is possible to force a specific version of python to be used when building the kernel by passing PYTHON3= on the make command line. However kernel-doc.py is currently called with python3 hard-coded and thus ignores this setting. Use $(PYTHON3) to run $(KERNELDOC) so that the desired version of python is used. Signed-off-by: Jean Delvare Reviewed-by: Nicolas Schier Reviewed-by: Mauro Carvalho Chehab Link: https://patch.msgid.link/20251107192933.2bfe9e57@endymion Signed-off-by: Nathan Chancellor --- drivers/gpu/drm/Makefile | 2 +- drivers/gpu/drm/i915/Makefile | 2 +- include/drm/Makefile | 2 +- scripts/Makefile.build | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 4b2f7d794275..da2565e6de71 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -245,7 +245,7 @@ always-$(CONFIG_DRM_HEADER_TEST) += \ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = \ $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \ - PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ + PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e58c0c158b3a..b91575380708 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -413,7 +413,7 @@ obj-$(CONFIG_DRM_I915_GVT_KVMGT) += kvmgt.o # # Enable locally for CONFIG_DRM_I915_WERROR=y. See also scripts/Makefile.build ifdef CONFIG_DRM_I915_WERROR - cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none -Werror $< + cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none -Werror $< endif # header test diff --git a/include/drm/Makefile b/include/drm/Makefile index 1df6962556ef..48fae3f167c7 100644 --- a/include/drm/Makefile +++ b/include/drm/Makefile @@ -11,7 +11,7 @@ always-$(CONFIG_DRM_HEADER_TEST) += \ quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) cmd_hdrtest = \ $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \ - PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ + PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ touch $@ $(obj)/%.hdrtest: $(src)/%.h FORCE diff --git a/scripts/Makefile.build b/scripts/Makefile.build index d0ee33a487be..52c08c4eb0b9 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -167,7 +167,7 @@ else ifeq ($(KBUILD_CHECKSRC),2) endif ifneq ($(KBUILD_EXTRA_WARN),) - cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(KDOCFLAGS) \ + cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(KDOCFLAGS) \ $(if $(findstring 2, $(KBUILD_EXTRA_WARN)), -Wall) \ $< endif From dc55b3c3f61246e483e50c85d8d5366f9567e188 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Sat, 8 Nov 2025 00:45:19 +0000 Subject: [PATCH 0851/1075] KVM: SVM: Mark VMCB_LBR dirty when MSR_IA32_DEBUGCTLMSR is updated The APM lists the DbgCtlMsr field as being tracked by the VMCB_LBR clean bit. Always clear the bit when MSR_IA32_DEBUGCTLMSR is updated. The history is complicated, it was correctly cleared for L1 before commit 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running"). At that point svm_set_msr() started to rely on svm_update_lbrv() to clear the bit, but when nested virtualization is enabled the latter does not always clear it even if MSR_IA32_DEBUGCTLMSR changed. Go back to clearing it directly in svm_set_msr(). Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running") Reported-by: Matteo Rizzo Reported-by: evn@google.com Co-developed-by: Jim Mattson Signed-off-by: Jim Mattson Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251108004524.1600006-2-yosry.ahmed@linux.dev Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 76055c0ba177..39538098002b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -3004,7 +3004,11 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & DEBUGCTL_RESERVED_BITS) return 1; + if (svm_get_lbr_vmcb(svm)->save.dbgctl == data) + break; + svm_get_lbr_vmcb(svm)->save.dbgctl = data; + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); svm_update_lbrv(vcpu); break; case MSR_VM_HSAVE_PA: From fbe5e5f030c22ae717ee422aaab0e00ea84fab5e Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Sat, 8 Nov 2025 00:45:20 +0000 Subject: [PATCH 0852/1075] KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv() svm_update_lbrv() is called when MSR_IA32_DEBUGCTLMSR is updated, and on nested transitions where LBRV is used. It checks whether LBRV enablement needs to be changed in the current VMCB, and if it does, it also recalculate intercepts to LBR MSRs. However, there are cases where intercepts need to be updated even when LBRV enablement doesn't. Example scenario: - L1 has MSR_IA32_DEBUGCTLMSR cleared. - L1 runs L2 without LBR_CTL_ENABLE (no LBRV). - L2 sets DEBUGCTLMSR_LBR in MSR_IA32_DEBUGCTLMSR, svm_update_lbrv() sets LBR_CTL_ENABLE in VMCB02 and disables intercepts to LBR MSRs. - L2 exits to L1, svm_update_lbrv() is not called on this transition. - L1 clears MSR_IA32_DEBUGCTLMSR, svm_update_lbrv() finds that LBR_CTL_ENABLE is already cleared in VMCB01 and does nothing. - Intercepts remain disabled, L1 reads to LBR MSRs read the host MSRs. Fix it by always recalculating intercepts in svm_update_lbrv(). Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251108004524.1600006-3-yosry.ahmed@linux.dev Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 39538098002b..53201f13a43c 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -806,25 +806,29 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) vmcb_mark_dirty(to_vmcb, VMCB_LBR); } -void svm_enable_lbrv(struct kvm_vcpu *vcpu) +static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; - svm_recalc_lbr_msr_intercepts(vcpu); /* Move the LBR msrs to the vmcb02 so that the guest can see them. */ if (is_guest_mode(vcpu)) svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr); } -static void svm_disable_lbrv(struct kvm_vcpu *vcpu) +void svm_enable_lbrv(struct kvm_vcpu *vcpu) +{ + __svm_enable_lbrv(vcpu); + svm_recalc_lbr_msr_intercepts(vcpu); +} + +static void __svm_disable_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; - svm_recalc_lbr_msr_intercepts(vcpu); /* * Move the LBR msrs back to the vmcb01 to avoid copying them @@ -853,13 +857,18 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu) (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)); - if (enable_lbrv == current_enable_lbrv) - return; + if (enable_lbrv && !current_enable_lbrv) + __svm_enable_lbrv(vcpu); + else if (!enable_lbrv && current_enable_lbrv) + __svm_disable_lbrv(vcpu); - if (enable_lbrv) - svm_enable_lbrv(vcpu); - else - svm_disable_lbrv(vcpu); + /* + * During nested transitions, it is possible that the current VMCB has + * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa). + * In this case, even though LBR_CTL does not need an update, intercepts + * do, so always recalculate the intercepts here. + */ + svm_recalc_lbr_msr_intercepts(vcpu); } void disable_nmi_singlestep(struct vcpu_svm *svm) From 8a4821412cf2c1429fffa07c012dd150f2edf78c Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Sat, 8 Nov 2025 00:45:21 +0000 Subject: [PATCH 0853/1075] KVM: nSVM: Fix and simplify LBR virtualization handling with nested The current scheme for handling LBRV when nested is used is very complicated, especially when L1 does not enable LBRV (i.e. does not set LBR_CTL_ENABLE_MASK). To avoid copying LBRs between VMCB01 and VMCB02 on every nested transition, the current implementation switches between using VMCB01 or VMCB02 as the source of truth for the LBRs while L2 is running. If L2 enables LBR, VMCB02 is used as the source of truth. When L2 disables LBR, the LBRs are copied to VMCB01 and VMCB01 is used as the source of truth. This introduces significant complexity, and incorrect behavior in some cases. For example, on a nested #VMEXIT, the LBRs are only copied from VMCB02 to VMCB01 if LBRV is enabled in VMCB01. This is because L2's writes to MSR_IA32_DEBUGCTLMSR to enable LBR are intercepted and propagated to VMCB01 instead of VMCB02. However, LBRV is only enabled in VMCB02 when L2 is running. This means that if L2 enables LBR and exits to L1, the LBRs will not be propagated from VMCB02 to VMCB01, because LBRV is disabled in VMCB01. There is no meaningful difference in CPUID rate in L2 when copying LBRs on every nested transition vs. the current approach, so do the simple and correct thing and always copy LBRs between VMCB01 and VMCB02 on nested transitions (when LBRV is disabled by L1). Drop the conditional LBRs copying in __svm_{enable/disable}_lbrv() as it is now unnecessary. VMCB02 becomes the only source of truth for LBRs when L2 is running, regardless of LBRV being enabled by L1, drop svm_get_lbr_vmcb() and use svm->vmcb directly in its place. Fixes: 1d5a1b5860ed ("KVM: x86: nSVM: correctly virtualize LBR msrs when L2 is running") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251108004524.1600006-4-yosry.ahmed@linux.dev Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 20 ++++++----------- arch/x86/kvm/svm/svm.c | 46 +++++++++------------------------------ 2 files changed, 17 insertions(+), 49 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a6443feab252..da6e80b3ac35 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -677,11 +677,10 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 */ svm_copy_lbrs(vmcb02, vmcb12); vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS; - svm_update_lbrv(&svm->vcpu); - - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { + } else { svm_copy_lbrs(vmcb02, vmcb01); } + svm_update_lbrv(&svm->vcpu); } static inline bool is_evtinj_soft(u32 evtinj) @@ -833,11 +832,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, svm->soft_int_next_rip = vmcb12_rip; } - vmcb02->control.virt_ext = vmcb01->control.virt_ext & - LBR_CTL_ENABLE_MASK; - if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV)) - vmcb02->control.virt_ext |= - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); + /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */ if (!nested_vmcb_needs_vls_intercept(svm)) vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; @@ -1189,13 +1184,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm) kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) svm_copy_lbrs(vmcb12, vmcb02); - svm_update_lbrv(vcpu); - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { + else svm_copy_lbrs(vmcb01, vmcb02); - svm_update_lbrv(vcpu); - } + + svm_update_lbrv(vcpu); if (vnmi) { if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 53201f13a43c..10c21e4c5406 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -808,13 +808,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - - svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; - - /* Move the LBR msrs to the vmcb02 so that the guest can see them. */ - if (is_guest_mode(vcpu)) - svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr); + to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; } void svm_enable_lbrv(struct kvm_vcpu *vcpu) @@ -825,35 +819,15 @@ void svm_enable_lbrv(struct kvm_vcpu *vcpu) static void __svm_disable_lbrv(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); - svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; - - /* - * Move the LBR msrs back to the vmcb01 to avoid copying them - * on nested guest entries. - */ - if (is_guest_mode(vcpu)) - svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb); -} - -static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm) -{ - /* - * If LBR virtualization is disabled, the LBR MSRs are always kept in - * vmcb01. If LBR virtualization is enabled and L1 is running VMs of - * its own, the MSRs are moved between vmcb01 and vmcb02 as needed. - */ - return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb : - svm->vmcb01.ptr; + to_svm(vcpu)->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; } void svm_update_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK; - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) || + bool enable_lbrv = (svm->vmcb->save.dbgctl & DEBUGCTLMSR_LBR) || (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)); @@ -2733,19 +2707,19 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = svm->tsc_aux; break; case MSR_IA32_DEBUGCTLMSR: - msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl; + msr_info->data = svm->vmcb->save.dbgctl; break; case MSR_IA32_LASTBRANCHFROMIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from; + msr_info->data = svm->vmcb->save.br_from; break; case MSR_IA32_LASTBRANCHTOIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to; + msr_info->data = svm->vmcb->save.br_to; break; case MSR_IA32_LASTINTFROMIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from; + msr_info->data = svm->vmcb->save.last_excp_from; break; case MSR_IA32_LASTINTTOIP: - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to; + msr_info->data = svm->vmcb->save.last_excp_to; break; case MSR_VM_HSAVE_PA: msr_info->data = svm->nested.hsave_msr; @@ -3013,10 +2987,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) if (data & DEBUGCTL_RESERVED_BITS) return 1; - if (svm_get_lbr_vmcb(svm)->save.dbgctl == data) + if (svm->vmcb->save.dbgctl == data) break; - svm_get_lbr_vmcb(svm)->save.dbgctl = data; + svm->vmcb->save.dbgctl = data; vmcb_mark_dirty(svm->vmcb, VMCB_LBR); svm_update_lbrv(vcpu); break; From 7a39c723b7472b8aaa2e0a67d2b6c7cf1c45cafb Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Sat, 8 Nov 2025 22:23:25 +0800 Subject: [PATCH 0854/1075] ALSA: hda/tas2781: Add new quirk for HP new projects Add new vendor_id and subsystem_id in quirk for HP new projects. Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20251108142325.2563-1-baojun.xu@ti.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index 4aec5067c59d..a9698bf26887 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6694,6 +6694,15 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8e60, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e61, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x103c, 0x8e62, "HP Trekker ", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x103c, 0x8ed5, "HP Merino13X", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8ed6, "HP Merino13", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8ed7, "HP Merino14", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8ed8, "HP Merino16", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8ed9, "HP Merino14W", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8eda, "HP Merino16W", ALC245_FIXUP_TAS2781_SPI_2), + SND_PCI_QUIRK(0x103c, 0x8f40, "HP Lampas14", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x103c, 0x8f41, "HP Lampas16", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x103c, 0x8f42, "HP LampasW14", ALC287_FIXUP_TAS2781_I2C), SND_PCI_QUIRK(0x1043, 0x1032, "ASUS VivoBook X513EA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1034, "ASUS GU605C", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), From e9a6fb0bcdd7609be6969112f3fbfcce3b1d4a7c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 9 Nov 2025 15:10:19 -0800 Subject: [PATCH 0855/1075] Linux 6.18-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 088565edc911..fb4389aa5d5f 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* From 79280191c2fd7f24899bbd640003b5389d3c109c Mon Sep 17 00:00:00 2001 From: Henrique Carvalho Date: Fri, 7 Nov 2025 18:59:53 -0300 Subject: [PATCH 0856/1075] smb: client: fix cifs_pick_channel when channel needs reconnect cifs_pick_channel iterates candidate channels using cur. The reconnect-state test mistakenly used a different variable. This checked the wrong slot and would cause us to skip a healthy channel and to dispatch on one that needs reconnect, occasionally failing operations when a channel was down. Fix by replacing for the correct variable. Fixes: fc43a8ac396d ("cifs: cifs_pick_channel should try selecting active channels") Cc: stable@vger.kernel.org Reviewed-by: Shyam Prasad N Signed-off-by: Henrique Carvalho Signed-off-by: Steve French --- fs/smb/client/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 051cd9dbba13..915cedde5d66 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -830,7 +830,7 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) if (!server || server->terminate) continue; - if (CIFS_CHAN_NEEDS_RECONNECT(ses, i)) + if (CIFS_CHAN_NEEDS_RECONNECT(ses, cur)) continue; /* From e8c73eb7db0a498cd4b22d2819e6ab1a6f506bd6 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Fri, 7 Nov 2025 22:01:39 +0800 Subject: [PATCH 0857/1075] cifs: client: fix memory leak in smb3_fs_context_parse_param The user calls fsconfig twice, but when the program exits, free() only frees ctx->source for the second fsconfig, not the first. Regarding fc->source, there is no code in the fs context related to its memory reclamation. To fix this memory leak, release the source memory corresponding to ctx or fc before each parsing. syzbot reported: BUG: memory leak unreferenced object 0xffff888128afa360 (size 96): backtrace (crc 79c9c7ba): kstrdup+0x3c/0x80 mm/util.c:84 smb3_fs_context_parse_param+0x229b/0x36c0 fs/smb/client/fs_context.c:1444 BUG: memory leak unreferenced object 0xffff888112c7d900 (size 96): backtrace (crc 79c9c7ba): smb3_fs_context_fullpath+0x70/0x1b0 fs/smb/client/fs_context.c:629 smb3_fs_context_parse_param+0x2266/0x36c0 fs/smb/client/fs_context.c:1438 Reported-by: syzbot+72afd4c236e6bc3f4bac@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=72afd4c236e6bc3f4bac Cc: stable@vger.kernel.org Reviewed-by: Paulo Alcantara (Red Hat) Signed-off-by: Edward Adam Davis Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index e60927b2a7c8..c2d5bb23040c 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1435,12 +1435,14 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_errorf(fc, "Unknown error parsing devname\n"); goto cifs_parse_mount_err; } + kfree(ctx->source); ctx->source = smb3_fs_context_fullpath(ctx, '/'); if (IS_ERR(ctx->source)) { ctx->source = NULL; cifs_errorf(fc, "OOM when copying UNC string\n"); goto cifs_parse_mount_err; } + kfree(fc->source); fc->source = kstrdup(ctx->source, GFP_KERNEL); if (fc->source == NULL) { cifs_errorf(fc, "OOM when copying UNC string\n"); From e904d81ad1c04394e1cda4610de799a006cc141c Mon Sep 17 00:00:00 2001 From: Joshua Rogers Date: Fri, 7 Nov 2025 00:15:37 +0800 Subject: [PATCH 0858/1075] smb: server: rdma: avoid unmapping posted recv on accept failure smb_direct_prepare_negotiation() posts a recv and then, if smb_direct_accept_client() fails, calls put_recvmsg() on the same buffer. That unmaps and recycles a buffer that is still posted on the QP., which can lead to device DMA into unmapped or reused memory. Track whether the recv was posted and only return it if it was never posted. If accept fails after a post, leave it for teardown to drain and complete safely. Signed-off-by: Joshua Rogers Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 5d3b48e77012..3d8d8cb456c1 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -1883,6 +1883,7 @@ static int smb_direct_accept_client(struct smbdirect_socket *sc) static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) { struct smbdirect_recv_io *recvmsg; + bool recv_posted = false; int ret; WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); @@ -1899,6 +1900,7 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) pr_err("Can't post recv: %d\n", ret); goto out_err; } + recv_posted = true; ret = smb_direct_accept_client(sc); if (ret) { @@ -1908,7 +1910,14 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) return 0; out_err: - put_recvmsg(sc, recvmsg); + /* + * If the recv was never posted, return it to the free list. + * If it was posted, leave it alone so disconnect teardown can + * drain the QP and complete it (flush) and the completion path + * will unmap it exactly once. + */ + if (!recv_posted) + put_recvmsg(sc, recvmsg); return ret; } From 98a5fd31cbf72d46bf18e50b3ab0ce86d5f319a9 Mon Sep 17 00:00:00 2001 From: Joshua Rogers Date: Sat, 8 Nov 2025 22:59:23 +0800 Subject: [PATCH 0859/1075] ksmbd: close accepted socket when per-IP limit rejects connection When the per-IP connection limit is exceeded in ksmbd_kthread_fn(), the code sets ret = -EAGAIN and continues the accept loop without closing the just-accepted socket. That leaks one socket per rejected attempt from a single IP and enables a trivial remote DoS. Release client_sk before continuing. This bug was found with ZeroPath. Cc: stable@vger.kernel.org Signed-off-by: Joshua Rogers Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_tcp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 7a1e3dcc2cde..d2e391c29464 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -290,8 +290,11 @@ static int ksmbd_kthread_fn(void *p) } } up_read(&conn_list_lock); - if (ret == -EAGAIN) + if (ret == -EAGAIN) { + /* Per-IP limit hit: release the just-accepted socket. */ + sock_release(client_sk); continue; + } skip_max_ip_conns_limit: if (server_conf.max_connections && From fe4b3a34e9a9654d98d274218dac0270779db0ae Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sun, 9 Nov 2025 16:01:50 +0800 Subject: [PATCH 0860/1075] rust: Add -fno-isolate-erroneous-paths-dereference to bindgen_skip_c_flags It's used to work around an objtool issue since commit abb2a5572264 ("LoongArch: Add cflag -fno-isolate-erroneous-paths-dereference"), but it's then passed to bindgen and cause an error because Clang does not have this option. Fixes: abb2a5572264 ("LoongArch: Add cflag -fno-isolate-erroneous-paths-dereference") Acked-by: Miguel Ojeda Tested-by: Mingcong Bai Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- rust/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index 3e545c1a0ff4..7842ad0a4ea7 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -298,7 +298,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ -fstrict-flex-arrays=% -fmin-function-alignment=% \ -fzero-init-padding-bits=% -mno-fdpic \ - --param=% --param asan-% + --param=% --param asan-% -fno-isolate-erroneous-paths-dereference # Derived from `scripts/Makefile.clang`. BINDGEN_TARGET_x86 := x86_64-linux-gnu From f28abb9f96e65a28d46885afd6b70cfc4d5df5a2 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 9 Nov 2025 16:02:00 +0800 Subject: [PATCH 0861/1075] LoongArch: Clarify 3 MSG interrupt features LoongArch's MSG interrupt features are used across multiple subsystems. Clarify these features to avoid misuse, existing users will be adjusted if necessary. MSGINT: Infrastructure, means the CPU core supports message interupts. Indicated by CPUCFG1.MSGINT. AVECINT: AVEC interrupt controller based on MSGINT, means the CPU chip supports direct message interrupts. Indicated by IOCSR.FEATURES.DMSI. REDIRECTINT: REDIRECT interrupt controller based on MSGINT and AVECINT, means the CPU chip supports redirect message interrupts. Indicated by IOCSR.FEATURES.RMSI. For example: Loongson-3A5000/3C5000 doesn't support MSGINT/AVECINT/REDIRECTINT; Loongson-3A6000 supports MSGINT but doesn't support AVECINT/REDIRECTINT; Loongson-3C6000 supports MSGINT/AVECINT/REDIRECTINT. Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/cpu-features.h | 2 ++ arch/loongarch/include/asm/cpu.h | 6 +++++- arch/loongarch/include/asm/loongarch.h | 1 + arch/loongarch/kernel/cpu-probe.c | 4 ++++ 4 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/cpu-features.h b/arch/loongarch/include/asm/cpu-features.h index fc83bb32f9f0..bd5f0457ad21 100644 --- a/arch/loongarch/include/asm/cpu-features.h +++ b/arch/loongarch/include/asm/cpu-features.h @@ -67,6 +67,8 @@ #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) #define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) #define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW) +#define cpu_has_msgint cpu_opt(LOONGARCH_CPU_MSGINT) #define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT) +#define cpu_has_redirectint cpu_opt(LOONGARCH_CPU_REDIRECTINT) #endif /* __ASM_CPU_FEATURES_H */ diff --git a/arch/loongarch/include/asm/cpu.h b/arch/loongarch/include/asm/cpu.h index dfb982fe8701..d4cd4041bee7 100644 --- a/arch/loongarch/include/asm/cpu.h +++ b/arch/loongarch/include/asm/cpu.h @@ -101,7 +101,9 @@ enum cpu_type_enum { #define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */ #define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */ #define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */ -#define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_MSGINT 29 /* CPU has MSG interrupt */ +#define CPU_FEATURE_AVECINT 30 /* CPU has AVEC interrupt */ +#define CPU_FEATURE_REDIRECTINT 31 /* CPU has interrupt remapping */ #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) @@ -132,6 +134,8 @@ enum cpu_type_enum { #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) #define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) #define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW) +#define LOONGARCH_CPU_MSGINT BIT_ULL(CPU_FEATURE_MSGINT) #define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT) +#define LOONGARCH_CPU_REDIRECTINT BIT_ULL(CPU_FEATURE_REDIRECTINT) #endif /* _ASM_CPU_H */ diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 09dfd7eb406e..5b36fa57015f 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1137,6 +1137,7 @@ #define IOCSRF_FLATMODE BIT_ULL(10) #define IOCSRF_VM BIT_ULL(11) #define IOCSRF_AVEC BIT_ULL(15) +#define IOCSRF_REDIRECT BIT_ULL(16) #define LOONGARCH_IOCSR_VENDOR 0x10 diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index cbfce2872d71..6f943d1391ff 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -157,6 +157,8 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options |= LOONGARCH_CPU_TLB; if (config & CPUCFG1_IOCSR) c->options |= LOONGARCH_CPU_IOCSR; + if (config & CPUCFG1_MSGINT) + c->options |= LOONGARCH_CPU_MSGINT; if (config & CPUCFG1_UAL) { c->options |= LOONGARCH_CPU_UAL; elf_hwcap |= HWCAP_LOONGARCH_UAL; @@ -331,6 +333,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_loongarch *c, unsigned int c->options |= LOONGARCH_CPU_EIODECODE; if (config & IOCSRF_AVEC) c->options |= LOONGARCH_CPU_AVECINT; + if (config & IOCSRF_REDIRECT) + c->options |= LOONGARCH_CPU_REDIRECTINT; if (config & IOCSRF_VM) c->options |= LOONGARCH_CPU_HYPERVISOR; } From 4e67526840fc55917581b90f6a4b65849a616dd8 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 9 Nov 2025 16:02:00 +0800 Subject: [PATCH 0862/1075] LoongArch: Use physical addresses for CSR_MERRENTRY/CSR_TLBRENTRY Now we use virtual addresses to fill CSR_MERRENTRY/CSR_TLBRENTRY, but hardware hope physical addresses. Now it works well because the high bits are ignored above PA_BITS (48 bits), but explicitly use physical addresses can avoid potential bugs. So fix it. Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kernel/traps.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 3d9be6ca7ec5..da5926fead4a 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -1131,8 +1131,8 @@ static void configure_exception_vector(void) tlbrentry = (unsigned long)exception_handlers + 80*VECSIZE; csr_write64(eentry, LOONGARCH_CSR_EENTRY); - csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); - csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); + csr_write64(__pa(eentry), LOONGARCH_CSR_MERRENTRY); + csr_write64(__pa(tlbrentry), LOONGARCH_CSR_TLBRENTRY); } void per_cpu_trap_init(int cpu) From 43a9e6a10bdde32445ad2725f568e08a94e51dc9 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 9 Nov 2025 16:02:00 +0800 Subject: [PATCH 0863/1075] LoongArch: Consolidate early_ioremap()/ioremap_prot() 1. Use phys_addr_t instead of u64, which can work for both 32/64 bits. 2. Check whether the input physical address is above TO_PHYS_MASK (and return NULL if yes) for the DMW version. Note: In theory early_ioremap() also need the TO_PHYS_MASK checking, but the UEFI BIOS pass some DMW virtual addresses. Cc: stable@vger.kernel.org Signed-off-by: Jiaxun Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/io.h | 5 ++++- arch/loongarch/mm/ioremap.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index eaff72b38dc8..0130185e0349 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -14,7 +14,7 @@ #include #include -extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); +extern void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size); extern void __init early_iounmap(void __iomem *addr, unsigned long size); #define early_memremap early_ioremap @@ -25,6 +25,9 @@ extern void __init early_iounmap(void __iomem *addr, unsigned long size); static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, pgprot_t prot) { + if (offset > TO_PHYS_MASK) + return NULL; + switch (pgprot_val(prot) & _CACHE_MASK) { case _CACHE_CC: return (void __iomem *)(unsigned long)(CACHE_BASE + offset); diff --git a/arch/loongarch/mm/ioremap.c b/arch/loongarch/mm/ioremap.c index df949a3d0f34..27c336959fe8 100644 --- a/arch/loongarch/mm/ioremap.c +++ b/arch/loongarch/mm/ioremap.c @@ -6,7 +6,7 @@ #include #include -void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) +void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size) { return ((void __iomem *)TO_CACHE(phys_addr)); } From ce5ad03e459ecb3b4993a8f311fd4f2fb3e6ef81 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 9 Nov 2025 16:02:01 +0800 Subject: [PATCH 0864/1075] LoongArch: Consolidate max_pfn & max_low_pfn calculation Now there 5 places which calculate max_pfn & max_low_pfn: 1. in fdt_setup() for FDT systems; 2. in memblock_init() for ACPI systems; 3. in init_numa_memory() for NUMA systems; 4. in arch_mem_init() to recalculate for "mem=" cmdline; 5. in paging_init() to recalculate for NUMA systems. Since memblock_init() is called both for ACPI and FDT systems, move the calculation out of the for_each_efi_memory_desc() loop can eliminate the first case. The last case is very questionable (may be derived from the MIPS/Loongson code) and breaks the "mem=" cmdline, so should be removed. And then the NUMA version of paging_init() can be also eliminated. After consolidation there are 3 places of calculation: 1. in memblock_init() for both ACPI and FDT systems; 2. in init_numa_memory() to recalculate for NUMA systems; 3. in arch_mem_init() to recalculate for the "mem=" cmdline. For all cases the calculation is: max_pfn = PFN_DOWN(memblock_end_of_DRAM()); max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kernel/mem.c | 7 +++---- arch/loongarch/kernel/numa.c | 23 ++--------------------- arch/loongarch/kernel/setup.c | 5 ++--- arch/loongarch/mm/init.c | 2 -- 4 files changed, 7 insertions(+), 30 deletions(-) diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c index aed901c57fb4..8ab1ffedc52c 100644 --- a/arch/loongarch/kernel/mem.c +++ b/arch/loongarch/kernel/mem.c @@ -13,7 +13,7 @@ void __init memblock_init(void) { u32 mem_type; - u64 mem_start, mem_end, mem_size; + u64 mem_start, mem_size; efi_memory_desc_t *md; /* Parse memory information */ @@ -21,7 +21,6 @@ void __init memblock_init(void) mem_type = md->type; mem_start = md->phys_addr; mem_size = md->num_pages << EFI_PAGE_SHIFT; - mem_end = mem_start + mem_size; switch (mem_type) { case EFI_LOADER_CODE: @@ -31,8 +30,6 @@ void __init memblock_init(void) case EFI_PERSISTENT_MEMORY: case EFI_CONVENTIONAL_MEMORY: memblock_add(mem_start, mem_size); - if (max_low_pfn < (mem_end >> PAGE_SHIFT)) - max_low_pfn = mem_end >> PAGE_SHIFT; break; case EFI_PAL_CODE: case EFI_UNUSABLE_MEMORY: @@ -49,6 +46,8 @@ void __init memblock_init(void) } } + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); memblock_set_current_limit(PFN_PHYS(max_low_pfn)); /* Reserve the first 2MB */ diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c index d6e73e8f9c0b..ab9c660526a3 100644 --- a/arch/loongarch/kernel/numa.c +++ b/arch/loongarch/kernel/numa.c @@ -272,7 +272,8 @@ int __init init_numa_memory(void) node_mem_init(node); node_set_online(node); } - max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); setup_nr_node_ids(); loongson_sysconf.nr_nodes = nr_node_ids; @@ -283,26 +284,6 @@ int __init init_numa_memory(void) #endif -void __init paging_init(void) -{ - unsigned int node; - unsigned long zones_size[MAX_NR_ZONES] = {0, }; - - for_each_online_node(node) { - unsigned long start_pfn, end_pfn; - - get_pfn_range_for_nid(node, &start_pfn, &end_pfn); - - if (end_pfn > max_low_pfn) - max_low_pfn = end_pfn; - } -#ifdef CONFIG_ZONE_DMA32 - zones_size[ZONE_DMA32] = MAX_DMA32_PFN; -#endif - zones_size[ZONE_NORMAL] = max_low_pfn; - free_area_init(zones_size); -} - int pcibus_to_node(struct pci_bus *bus) { return dev_to_node(&bus->dev); diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 69c17d162fff..25a87378e48e 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -294,8 +294,6 @@ static void __init fdt_setup(void) early_init_dt_scan(fdt_pointer, __pa(fdt_pointer)); early_init_fdt_reserve_self(); - - max_low_pfn = PFN_PHYS(memblock_end_of_DRAM()); #endif } @@ -390,7 +388,8 @@ static void __init check_kernel_sections_mem(void) static void __init arch_mem_init(char **cmdline_p) { /* Recalculate max_low_pfn for "mem=xxx" */ - max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); if (usermem) pr_info("User-defined physical RAM map overwrite\n"); diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index c3e4586a7975..6bfd4b8dad1b 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -60,7 +60,6 @@ int __ref page_is_ram(unsigned long pfn) return memblock_is_memory(addr) && !memblock_is_reserved(addr); } -#ifndef CONFIG_NUMA void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -72,7 +71,6 @@ void __init paging_init(void) free_area_init(max_zone_pfns); } -#endif /* !CONFIG_NUMA */ void __ref free_initmem(void) { From a073d637c8cfbfbab39b7272226a3fbf3b887580 Mon Sep 17 00:00:00 2001 From: Tianyang Zhang Date: Sun, 9 Nov 2025 16:02:01 +0800 Subject: [PATCH 0865/1075] LoongArch: Let {pte,pmd}_modify() record the status of _PAGE_DIRTY Now if the PTE/PMD is dirty with _PAGE_DIRTY but without _PAGE_MODIFIED, after {pte,pmd}_modify() we lose _PAGE_DIRTY, then {pte,pmd}_dirty() return false and lead to data loss. This can happen in certain scenarios such as HW PTW doesn't set _PAGE_MODIFIED automatically, so here we need _PAGE_MODIFIED to record the dirty status (_PAGE_DIRTY). The new modification involves checking whether the original PTE/PMD has the _PAGE_DIRTY flag. If it exists, the _PAGE_MODIFIED bit is also set, ensuring that the {pte,pmd}_dirty() interface can always return accurate information. Cc: stable@vger.kernel.org Co-developed-by: Liupu Wang Signed-off-by: Liupu Wang Signed-off-by: Tianyang Zhang --- arch/loongarch/include/asm/pgtable.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index bd128696e96d..03fb60432fde 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -424,6 +424,9 @@ static inline unsigned long pte_accessible(struct mm_struct *mm, pte_t a) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { + if (pte_val(pte) & _PAGE_DIRTY) + pte_val(pte) |= _PAGE_MODIFIED; + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); } @@ -547,9 +550,11 @@ static inline struct page *pmd_page(pmd_t pmd) static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) | - (pgprot_val(newprot) & ~_HPAGE_CHG_MASK); - return pmd; + if (pmd_val(pmd) & _PAGE_DIRTY) + pmd_val(pmd) |= _PAGE_MODIFIED; + + return __pmd((pmd_val(pmd) & _HPAGE_CHG_MASK) | + (pgprot_val(newprot) & ~_HPAGE_CHG_MASK)); } static inline pmd_t pmd_mkinvalid(pmd_t pmd) From 17f838512ae50203ae2e3ce9b9f2689cc67beaa3 Mon Sep 17 00:00:00 2001 From: "Vishal Moola (Oracle)" Date: Sun, 9 Nov 2025 16:02:01 +0800 Subject: [PATCH 0866/1075] LoongArch: Remove __GFP_HIGHMEM masking in pud_alloc_one() Remove the unnecessary __GFP_HIGHMEM masking in pud_alloc_one(), which was introduced with commit 382739797f79ec2 ("loongarch: convert various functions to use ptdescs"). GFP_KERNEL doesn't contain __GFP_HIGHMEM. Signed-off-by: Vishal Moola (Oracle) Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/pgalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h index 1c63a9d9a6d3..08dcc698ec18 100644 --- a/arch/loongarch/include/asm/pgalloc.h +++ b/arch/loongarch/include/asm/pgalloc.h @@ -88,7 +88,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { pud_t *pud; - struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); if (!ptdesc) return NULL; From 4c8a7c9827726f6e987b7a04af8ef58f1c7fe8d3 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sun, 9 Nov 2025 16:02:01 +0800 Subject: [PATCH 0867/1075] LoongArch: Refine the init_hw_perf_events() function (1) Use the existing CPUCFG6_PMNUM_SHIFT macro definition instead of the magic value 4 to get the PMU number. (2) Detect the value of PMU bits via CPUCFG instruction according to the ISA manual instead of hard-coded as 64, because the value may be different for various micro-architectures. Link: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html#_cpucfg Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarch.h | 1 + arch/loongarch/kernel/perf_event.c | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 5b36fa57015f..3de03cb864b2 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -128,6 +128,7 @@ #define CPUCFG6_PMNUM GENMASK(7, 4) #define CPUCFG6_PMNUM_SHIFT 4 #define CPUCFG6_PMBITS GENMASK(13, 8) +#define CPUCFG6_PMBITS_SHIFT 8 #define CPUCFG6_UPM BIT(14) #define LOONGARCH_CPUCFG16 0x10 diff --git a/arch/loongarch/kernel/perf_event.c b/arch/loongarch/kernel/perf_event.c index 8ad098703488..9d257c8519c9 100644 --- a/arch/loongarch/kernel/perf_event.c +++ b/arch/loongarch/kernel/perf_event.c @@ -845,13 +845,14 @@ static const struct loongarch_perf_event *loongarch_pmu_map_raw_event(u64 config static int __init init_hw_perf_events(void) { - int counters; + int bits, counters; if (!cpu_has_pmp) return -ENODEV; pr_info("Performance counters: "); - counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; + bits = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1; + counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1; loongarch_pmu.num_counters = counters; loongarch_pmu.max_period = (1ULL << 63) - 1; @@ -867,7 +868,7 @@ static int __init init_hw_perf_events(void) on_each_cpu(reset_counters, NULL, 1); pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", - loongarch_pmu.name, counters, 64); + loongarch_pmu.name, counters, bits); perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); From eeeeaafa62ea0cd4b86390f657dc0aea73bff4f5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 9 Nov 2025 16:02:01 +0800 Subject: [PATCH 0868/1075] LoongArch: Use correct accessor to read FWPC/MWPC CSR.FWPC and CSR.MWPC are 32bit registers, so use csr_read32() rather than csr_read64() to read the values of FWPC/MWPC. Cc: stable@vger.kernel.org Fixes: edffa33c7bb5a73 ("LoongArch: Add hardware breakpoints/watchpoints support") Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/hw_breakpoint.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/include/asm/hw_breakpoint.h b/arch/loongarch/include/asm/hw_breakpoint.h index 13b2462f3d8c..5faa97a87a9e 100644 --- a/arch/loongarch/include/asm/hw_breakpoint.h +++ b/arch/loongarch/include/asm/hw_breakpoint.h @@ -134,13 +134,13 @@ static inline void hw_breakpoint_thread_switch(struct task_struct *next) /* Determine number of BRP registers available. */ static inline int get_num_brps(void) { - return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; + return csr_read32(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; } /* Determine number of WRP registers available. */ static inline int get_num_wrps(void) { - return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; + return csr_read32(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; } #endif /* __KERNEL__ */ From df16b8956cae970027f4be4a1500272201e2d5c1 Mon Sep 17 00:00:00 2001 From: Youling Tang Date: Sun, 9 Nov 2025 16:02:01 +0800 Subject: [PATCH 0869/1075] LoongArch: kexec: Initialize the kexec_buf structure The kexec_buf structure was previously declared without initialization. commit bf454ec31add ("kexec_file: allow to place kexec_buf randomly") added a field that is always read but not consistently populated by all architectures. This un-initialized field will contain garbage. This is also triggering a UBSAN warning when the uninitialized data is accessed: ------------[ cut here ]------------ UBSAN: invalid-load in ./include/linux/kexec.h:210:10 load of value 252 is not a valid value for type '_Bool' Zero-initializing kexec_buf at declaration ensures all fields are cleanly set, preventing future instances of uninitialized memory being used. Fixes: bf454ec31add ("kexec_file: allow to place kexec_buf randomly") Link: https://lore.kernel.org/r/20250827-kbuf_all-v1-2-1df9882bb01a@debian.org Signed-off-by: Youling Tang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/kexec_efi.c | 2 +- arch/loongarch/kernel/kexec_elf.c | 2 +- arch/loongarch/kernel/machine_kexec_file.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/loongarch/kernel/kexec_efi.c b/arch/loongarch/kernel/kexec_efi.c index 45121b914f8f..5ee78ebb1546 100644 --- a/arch/loongarch/kernel/kexec_efi.c +++ b/arch/loongarch/kernel/kexec_efi.c @@ -42,7 +42,7 @@ static void *efi_kexec_load(struct kimage *image, { int ret; unsigned long text_offset, kernel_segment_number; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; struct kexec_segment *kernel_segment; struct loongarch_image_header *h; diff --git a/arch/loongarch/kernel/kexec_elf.c b/arch/loongarch/kernel/kexec_elf.c index 97b2f049801a..1b6b64744c7f 100644 --- a/arch/loongarch/kernel/kexec_elf.c +++ b/arch/loongarch/kernel/kexec_elf.c @@ -59,7 +59,7 @@ static void *elf_kexec_load(struct kimage *image, int ret; unsigned long text_offset, kernel_segment_number; struct elfhdr ehdr; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; struct kexec_elf_info elf_info; struct kexec_segment *kernel_segment; diff --git a/arch/loongarch/kernel/machine_kexec_file.c b/arch/loongarch/kernel/machine_kexec_file.c index dda236b51a88..fb57026f5f25 100644 --- a/arch/loongarch/kernel/machine_kexec_file.c +++ b/arch/loongarch/kernel/machine_kexec_file.c @@ -143,7 +143,7 @@ int load_other_segments(struct kimage *image, unsigned long initrd_load_addr = 0; unsigned long orig_segments = image->nr_segments; char *modified_cmdline = NULL; - struct kexec_buf kbuf; + struct kexec_buf kbuf = {}; kbuf.image = image; /* Don't allocate anything below the kernel */ From 62cda5e54f7c5e773911b458dd4d10ee8c91b60b Mon Sep 17 00:00:00 2001 From: Qiang Ma Date: Sun, 9 Nov 2025 16:02:01 +0800 Subject: [PATCH 0870/1075] LoongArch: kexec: Print out debugging message if required When specifying '-d' for kexec_file_load interface, loaded locations of kernel/initrd/cmdline etc can be printed out to help debug. Commit eb7622d908a0 ("kexec_file, riscv: print out debugging message if required") fixes the same issue on RISC-V. So, remove kexec_image_info() because the content has been printed out in generic code. And on Loongson-3A5000, the printed messages look like below: kexec_file: kernel: 00000000d9aad283 kernel_size: 0x2e77f30 kexec_file(EFI): No LoongArch PE image header. kexec_file: Loaded initrd at 0x80000000 bufsz=0x1637cd0 memsz=0x1638000 kexec_file(ELF): Loaded kernel at 0x9c20000 bufsz=0x27f1800 memsz=0x2950000 kexec_file: nr_segments = 2 kexec_file: segment[0]: buf=0x00000000cc3e6c33 bufsz=0x27f1800 mem=0x9c20000 memsz=0x2950000 kexec_file: segment[1]: buf=0x00000000bb75a541 bufsz=0x1637cd0 mem=0x80000000 memsz=0x1638000 kexec_file: kexec_file_load: type:0, start:0xb15d000 head:0x18db60002 flags:0x8 Signed-off-by: Qiang Ma Signed-off-by: Huacai Chen --- arch/loongarch/kernel/machine_kexec.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c index e4b2bbc47e62..2d64b7c81e5e 100644 --- a/arch/loongarch/kernel/machine_kexec.c +++ b/arch/loongarch/kernel/machine_kexec.c @@ -39,34 +39,12 @@ static unsigned long systable_ptr; static unsigned long start_addr; static unsigned long first_ind_entry; -static void kexec_image_info(const struct kimage *kimage) -{ - unsigned long i; - - pr_debug("kexec kimage info:\n"); - pr_debug("\ttype: %d\n", kimage->type); - pr_debug("\tstart: %lx\n", kimage->start); - pr_debug("\thead: %lx\n", kimage->head); - pr_debug("\tnr_segments: %lu\n", kimage->nr_segments); - - for (i = 0; i < kimage->nr_segments; i++) { - pr_debug("\t segment[%lu]: %016lx - %016lx", i, - kimage->segment[i].mem, - kimage->segment[i].mem + kimage->segment[i].memsz); - pr_debug("\t\t0x%lx bytes, %lu pages\n", - (unsigned long)kimage->segment[i].memsz, - (unsigned long)kimage->segment[i].memsz / PAGE_SIZE); - } -} - int machine_kexec_prepare(struct kimage *kimage) { int i; char *bootloader = "kexec"; void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR; - kexec_image_info(kimage); - kimage->arch.efi_boot = fw_arg0; kimage->arch.systable_ptr = fw_arg2; From 37e9d1a91382661c2d1f656b54c5d22dfe7a8606 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sun, 9 Nov 2025 16:02:09 +0800 Subject: [PATCH 0871/1075] LoongArch: KVM: Set page with write attribute if dirty track disabled With secondary MMU page table, if there is a read page fault, the page's write attribute will not set even if it is writable from master MMU page table. This logic only works if dirty tracking is enabled, so page table should be set with _PAGE_WRITE if dirty tracking is disabled. It reduces extra page fault on secondary MMU page table if a VM finishes migration, when the master MMU page table is ready and the secondary MMU page is fresh. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 7c8143e79c12..a7fa458e3360 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -857,7 +857,7 @@ static int kvm_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, bool write) if (writeable) { prot_bits = kvm_pte_mkwriteable(prot_bits); - if (write) + if (write || !kvm_slot_dirty_track_enabled(memslot)) prot_bits = kvm_pte_mkdirty(prot_bits); } From d3c9515e4f9d10ccb113adb4809db5cc31e7ef65 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sun, 9 Nov 2025 16:02:09 +0800 Subject: [PATCH 0872/1075] LoongArch: KVM: Add delay until timer interrupt injected When timer is fired in oneshot mode, CSR.TVAL will stop with value -1 rather than 0. However when the register CSR.TVAL is restored, it will continue to count down rather than stop there. Now the method is to write 0 to CSR.TVAL, wait to count down for 1 cycle at least, which is 10ns with a timer freq 100MHz, and then retore timer interrupt status. Here add 2 cycles delay to assure that timer interrupt is injected. With this patch, timer selftest case passes to run always. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/timer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 32dc213374be..29c2aaba63c3 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -4,6 +4,7 @@ */ #include +#include #include #include @@ -95,6 +96,7 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) * and set CSR TVAL with -1 */ write_gcsr_timertick(0); + __delay(2); /* Wait cycles until timer interrupt injected */ /* * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear From 5001bcf86edf2de02f025a0f789bcac37fa040e6 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sun, 9 Nov 2025 16:02:09 +0800 Subject: [PATCH 0873/1075] LoongArch: KVM: Restore guest PMU if it is enabled On LoongArch system, guest PMU hardware is shared by guest and host but PMU interrupt is separated. PMU is pass-through to VM, and there is PMU context switch when exit to host and return to guest. There is optimiation to check whether PMU is enabled by guest. If not, it is not necessary to return to guest. However, if it is enabled, PMU context for guest need switch on. Now KVM_REQ_PMU notification is set on vCPU context switch, but it is missing if there is no vCPU context switch while PMU is used by guest VM, so fix it. Cc: Fixes: f4e40ea9f78f ("LoongArch: KVM: Add PMU support for guest") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 30e3b089a596..b3995ff4b17e 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -132,6 +132,9 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu) * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when * exiting the guest, so that the next time trap into the guest. * We don't need to deal with PMU CSRs contexts. + * + * Otherwise set the request bit KVM_REQ_PMU to restore guest PMU + * before entering guest VM */ val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); @@ -139,6 +142,8 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu) val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); if (!(val & KVM_PMU_EVENT_ENABLED)) vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU; + else + kvm_make_request(KVM_REQ_PMU, vcpu); kvm_restore_host_pmu(vcpu); } From 11f340ece403e71aa2b643a2562a58ed3ac12e2c Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sun, 9 Nov 2025 16:02:09 +0800 Subject: [PATCH 0874/1075] LoongArch: KVM: Skip PMU checking on vCPU context switch PMU hardware about VM is switched on VM exit to host rather than vCPU context sched off, PMU is checked and restored on return to VM. It is not necessary to check PMU on vCPU context sched on callback, since the request is made on the VM exit entry or VM PMU CSR access abort routine already. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index b3995ff4b17e..1245a6b35896 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -148,12 +148,6 @@ static void kvm_lose_pmu(struct kvm_vcpu *vcpu) kvm_restore_host_pmu(vcpu); } -static void kvm_restore_pmu(struct kvm_vcpu *vcpu) -{ - if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU)) - kvm_make_request(KVM_REQ_PMU, vcpu); -} - static void kvm_check_pmu(struct kvm_vcpu *vcpu) { if (kvm_check_request(KVM_REQ_PMU, vcpu)) { @@ -304,7 +298,10 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { - kvm_lose_pmu(vcpu); + if (vcpu->arch.aux_inuse & KVM_LARCH_PMU) { + kvm_lose_pmu(vcpu); + kvm_make_request(KVM_REQ_PMU, vcpu); + } /* make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); @@ -1609,9 +1606,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_restore_timer(vcpu); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); - /* Restore hardware PMU CSRs */ - kvm_restore_pmu(vcpu); - /* Don't bother restoring registers multiple times unless necessary */ if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE) return 0; From 237e74bfa261fb0cf75bd08c9be0c5094018ee20 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Sun, 9 Nov 2025 16:02:09 +0800 Subject: [PATCH 0875/1075] LoongArch: KVM: Fix max supported vCPUs set with EIOINTC VM fails to boot with 256 vCPUs, the detailed command is qemu-system-loongarch64 -smp 256 and there is an error reported as follows: KVM_LOONGARCH_EXTIOI_INIT_NUM_CPU failed: Invalid argument There is typo issue in function kvm_eiointc_ctrl_access() when set max supported vCPUs. Cc: stable@vger.kernel.org Fixes: 47256c4c8b1b ("LoongArch: KVM: Avoid copy_*_user() with lock hold in kvm_eiointc_ctrl_access()") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/eiointc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kvm/intc/eiointc.c b/arch/loongarch/kvm/intc/eiointc.c index c32333695381..a1cc116b4dac 100644 --- a/arch/loongarch/kvm/intc/eiointc.c +++ b/arch/loongarch/kvm/intc/eiointc.c @@ -439,7 +439,7 @@ static int kvm_eiointc_ctrl_access(struct kvm_device *dev, spin_lock_irqsave(&s->lock, flags); switch (type) { case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: - if (val >= EIOINTC_ROUTE_MAX_VCPUS) + if (val > EIOINTC_ROUTE_MAX_VCPUS) ret = -EINVAL; else s->num_cpu = val; From 77008e1b2ef73249bceb078a321a3ff6bc087afb Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Thu, 16 Oct 2025 21:36:30 -0400 Subject: [PATCH 0876/1075] mm/huge_memory: do not change split_huge_page*() target order silently Page cache folios from a file system that support large block size (LBS) can have minimal folio order greater than 0, thus a high order folio might not be able to be split down to order-0. Commit e220917fa507 ("mm: split a folio in minimum folio order chunks") bumps the target order of split_huge_page*() to the minimum allowed order when splitting a LBS folio. This causes confusion for some split_huge_page*() callers like memory failure handling code, since they expect after-split folios all have order-0 when split succeeds but in reality get min_order_for_split() order folios and give warnings. Fix it by failing a split if the folio cannot be split to the target order. Rename try_folio_split() to try_folio_split_to_order() to reflect the added new_order parameter. Remove its unused list parameter. [The test poisons LBS folios, which cannot be split to order-0 folios, and also tries to poison all memory. The non split LBS folios take more memory than the test anticipated, leading to OOM. The patch fixed the kernel warning and the test needs some change to avoid OOM.] Link: https://lkml.kernel.org/r/20251017013630.139907-1-ziy@nvidia.com Fixes: e220917fa507 ("mm: split a folio in minimum folio order chunks") Signed-off-by: Zi Yan Reported-by: syzbot+e6367ea2fdab6ed46056@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68d2c943.a70a0220.1b52b.02b3.GAE@google.com/ Reviewed-by: Luis Chamberlain Reviewed-by: Pankaj Raghav Reviewed-by: Wei Yang Acked-by: David Hildenbrand Reviewed-by: Lorenzo Stoakes Reviewed-by: Miaohe Lin Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jane Chu Cc: Lance Yang Cc: Liam Howlett Cc: Mariano Pache Cc: Matthew Wilcox (Oracle) Cc: Naoya Horiguchi Cc: Ryan Roberts Cc: Christian Brauner Cc: Signed-off-by: Andrew Morton --- include/linux/huge_mm.h | 55 +++++++++++++++++------------------------ mm/huge_memory.c | 9 +------ mm/truncate.c | 6 +++-- 3 files changed, 28 insertions(+), 42 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f327d62fc985..71ac78b9f834 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -376,45 +376,30 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, int folio_split(struct folio *folio, unsigned int new_order, struct page *page, struct list_head *list); /* - * try_folio_split - try to split a @folio at @page using non uniform split. + * try_folio_split_to_order - try to split a @folio at @page to @new_order using + * non uniform split. * @folio: folio to be split - * @page: split to order-0 at the given page - * @list: store the after-split folios + * @page: split to @new_order at the given page + * @new_order: the target split order * - * Try to split a @folio at @page using non uniform split to order-0, if - * non uniform split is not supported, fall back to uniform split. + * Try to split a @folio at @page using non uniform split to @new_order, if + * non uniform split is not supported, fall back to uniform split. After-split + * folios are put back to LRU list. Use min_order_for_split() to get the lower + * bound of @new_order. * * Return: 0: split is successful, otherwise split failed. */ -static inline int try_folio_split(struct folio *folio, struct page *page, - struct list_head *list) +static inline int try_folio_split_to_order(struct folio *folio, + struct page *page, unsigned int new_order) { - int ret = min_order_for_split(folio); - - if (ret < 0) - return ret; - - if (!non_uniform_split_supported(folio, 0, false)) - return split_huge_page_to_list_to_order(&folio->page, list, - ret); - return folio_split(folio, ret, page, list); + if (!non_uniform_split_supported(folio, new_order, /* warns= */ false)) + return split_huge_page_to_list_to_order(&folio->page, NULL, + new_order); + return folio_split(folio, new_order, page, NULL); } static inline int split_huge_page(struct page *page) { - struct folio *folio = page_folio(page); - int ret = min_order_for_split(folio); - - if (ret < 0) - return ret; - - /* - * split_huge_page() locks the page before splitting and - * expects the same page that has been split to be locked when - * returned. split_folio(page_folio(page)) cannot be used here - * because it converts the page to folio and passes the head - * page to be split. - */ - return split_huge_page_to_list_to_order(page, NULL, ret); + return split_huge_page_to_list_to_order(page, NULL, 0); } void deferred_split_folio(struct folio *folio, bool partially_mapped); @@ -597,14 +582,20 @@ static inline int split_huge_page(struct page *page) return -EINVAL; } +static inline int min_order_for_split(struct folio *folio) +{ + VM_WARN_ON_ONCE_FOLIO(1, folio); + return -EINVAL; +} + static inline int split_folio_to_list(struct folio *folio, struct list_head *list) { VM_WARN_ON_ONCE_FOLIO(1, folio); return -EINVAL; } -static inline int try_folio_split(struct folio *folio, struct page *page, - struct list_head *list) +static inline int try_folio_split_to_order(struct folio *folio, + struct page *page, unsigned int new_order) { VM_WARN_ON_ONCE_FOLIO(1, folio); return -EINVAL; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1d1b74950332..feac4aef7dfb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3653,8 +3653,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order, min_order = mapping_min_folio_order(folio->mapping); if (new_order < min_order) { - VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u", - min_order); ret = -EINVAL; goto out; } @@ -3986,12 +3984,7 @@ int min_order_for_split(struct folio *folio) int split_folio_to_list(struct folio *folio, struct list_head *list) { - int ret = min_order_for_split(folio); - - if (ret < 0) - return ret; - - return split_huge_page_to_list_to_order(&folio->page, list, ret); + return split_huge_page_to_list_to_order(&folio->page, list, 0); } /* diff --git a/mm/truncate.c b/mm/truncate.c index 91eb92a5ce4f..9210cf808f5c 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -194,6 +194,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) size_t size = folio_size(folio); unsigned int offset, length; struct page *split_at, *split_at2; + unsigned int min_order; if (pos < start) offset = start - pos; @@ -223,8 +224,9 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) if (!folio_test_large(folio)) return true; + min_order = mapping_min_folio_order(folio->mapping); split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); - if (!try_folio_split(folio, split_at, NULL)) { + if (!try_folio_split_to_order(folio, split_at, min_order)) { /* * try to split at offset + length to make sure folios within * the range can be dropped, especially to avoid memory waste @@ -254,7 +256,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) */ if (folio_test_large(folio2) && folio2->mapping == folio->mapping) - try_folio_split(folio2, split_at2, NULL); + try_folio_split_to_order(folio2, split_at2, min_order); folio_unlock(folio2); out: From e38f65d317df1fd2dcafe614d9c537475ecf9992 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Mon, 20 Oct 2025 20:08:50 -0400 Subject: [PATCH 0877/1075] kho: warn and fail on metadata or preserved memory in scratch area Patch series "KHO: kfence + KHO memory corruption fix", v3. This series fixes a memory corruption bug in KHO that occurs when KFENCE is enabled. The root cause is that KHO metadata, allocated via kzalloc(), can be randomly serviced by kfence_alloc(). When a kernel boots via KHO, the early memblock allocator is restricted to a "scratch area". This forces the KFENCE pool to be allocated within this scratch area, creating a conflict. If KHO metadata is subsequently placed in this pool, it gets corrupted during the next kexec operation. Google is using KHO and have had obscure crashes due to this memory corruption, with stacks all over the place. I would prefer this fix to be properly backported to stable so we can also automatically consume it once we switch to the upstream KHO. Patch 1/3 introduces a debug-only feature (CONFIG_KEXEC_HANDOVER_DEBUG) that adds checks to detect and fail any operation that attempts to place KHO metadata or preserved memory within the scratch area. This serves as a validation and diagnostic tool to confirm the problem without affecting production builds. Patch 2/3 Increases bitmap to PAGE_SIZE, so buddy allocator can be used. Patch 3/3 Provides the fix by modifying KHO to allocate its metadata directly from the buddy allocator instead of slab. This bypasses the KFENCE interception entirely. This patch (of 3): It is invalid for KHO metadata or preserved memory regions to be located within the KHO scratch area, as this area is overwritten when the next kernel is loaded, and used early in boot by the next kernel. This can lead to memory corruption. Add checks to kho_preserve_* and KHO's internal metadata allocators (xa_load_or_alloc, new_chunk) to verify that the physical address of the memory does not overlap with any defined scratch region. If an overlap is detected, the operation will fail and a WARN_ON is triggered. To avoid performance overhead in production kernels, these checks are enabled only when CONFIG_KEXEC_HANDOVER_DEBUG is selected. [rppt@kernel.org: fix KEXEC_HANDOVER_DEBUG Kconfig dependency] Link: https://lkml.kernel.org/r/aQHUyyFtiNZhx8jo@kernel.org [pasha.tatashin@soleen.com: build fix] Link: https://lkml.kernel.org/r/CA+CK2bBnorfsTymKtv4rKvqGBHs=y=MjEMMRg_tE-RME6n-zUw@mail.gmail.com Link: https://lkml.kernel.org/r/20251021000852.2924827-1-pasha.tatashin@soleen.com Link: https://lkml.kernel.org/r/20251021000852.2924827-2-pasha.tatashin@soleen.com Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation") Signed-off-by: Pasha Tatashin Signed-off-by: Mike Rapoport Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Cc: Alexander Graf Cc: Christian Brauner Cc: David Matlack Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Miguel Ojeda Cc: Randy Dunlap Cc: Samiullah Khawaja Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- kernel/Kconfig.kexec | 9 +++++ kernel/Makefile | 1 + kernel/kexec_handover.c | 57 +++++++++++++++++++++----------- kernel/kexec_handover_debug.c | 25 ++++++++++++++ kernel/kexec_handover_internal.h | 20 +++++++++++ 5 files changed, 93 insertions(+), 19 deletions(-) create mode 100644 kernel/kexec_handover_debug.c create mode 100644 kernel/kexec_handover_internal.h diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 422270d64820..54e581072617 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -109,6 +109,15 @@ config KEXEC_HANDOVER to keep data or state alive across the kexec. For this to work, both source and target kernels need to have this option enabled. +config KEXEC_HANDOVER_DEBUG + bool "Enable Kexec Handover debug checks" + depends on KEXEC_HANDOVER + help + This option enables extra sanity checks for the Kexec Handover + subsystem. Since, KHO performance is crucial in live update + scenarios and the extra code might be adding overhead it is + only optionally enabled. + config CRASH_DUMP bool "kernel crash dumps" default ARCH_DEFAULT_CRASH_DUMP diff --git a/kernel/Makefile b/kernel/Makefile index df3dd8291bb6..9fe722305c9b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o +obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o obj-$(CONFIG_COMPAT) += compat.o obj-$(CONFIG_CGROUPS) += cgroup/ diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 76f0940fb485..0bc9001e532a 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) "KHO: " fmt +#include #include #include #include @@ -22,6 +23,7 @@ #include +#include "kexec_handover_internal.h" /* * KHO is tightly coupled with mm init and needs access to some of mm * internal APIs. @@ -133,26 +135,26 @@ static struct kho_out kho_out = { static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz) { - void *elm, *res; + void *res = xa_load(xa, index); - elm = xa_load(xa, index); - if (elm) - return elm; + if (res) + return res; + + void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL); - elm = kzalloc(sz, GFP_KERNEL); if (!elm) return ERR_PTR(-ENOMEM); + if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz))) + return ERR_PTR(-EINVAL); + res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL); if (xa_is_err(res)) - res = ERR_PTR(xa_err(res)); - - if (res) { - kfree(elm); + return ERR_PTR(xa_err(res)); + else if (res) return res; - } - return elm; + return no_free_ptr(elm); } static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn, @@ -345,15 +347,19 @@ static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE); static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk, unsigned long order) { - struct khoser_mem_chunk *chunk; + struct khoser_mem_chunk *chunk __free(kfree) = NULL; chunk = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!chunk) - return NULL; + return ERR_PTR(-ENOMEM); + + if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE))) + return ERR_PTR(-EINVAL); + chunk->hdr.order = order; if (cur_chunk) KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk); - return chunk; + return no_free_ptr(chunk); } static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk) @@ -374,14 +380,17 @@ static int kho_mem_serialize(struct kho_serialization *ser) struct khoser_mem_chunk *chunk = NULL; struct kho_mem_phys *physxa; unsigned long order; + int err = -ENOMEM; xa_for_each(&ser->track.orders, order, physxa) { struct kho_mem_phys_bits *bits; unsigned long phys; chunk = new_chunk(chunk, order); - if (!chunk) + if (IS_ERR(chunk)) { + err = PTR_ERR(chunk); goto err_free; + } if (!first_chunk) first_chunk = chunk; @@ -391,8 +400,10 @@ static int kho_mem_serialize(struct kho_serialization *ser) if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) { chunk = new_chunk(chunk, order); - if (!chunk) + if (IS_ERR(chunk)) { + err = PTR_ERR(chunk); goto err_free; + } } elm = &chunk->bitmaps[chunk->hdr.num_elms]; @@ -409,7 +420,7 @@ static int kho_mem_serialize(struct kho_serialization *ser) err_free: kho_mem_ser_free(first_chunk); - return -ENOMEM; + return err; } static void __init deserialize_bitmap(unsigned int order, @@ -465,8 +476,8 @@ static void __init kho_mem_deserialize(const void *fdt) * area for early allocations that happen before page allocator is * initialized. */ -static struct kho_scratch *kho_scratch; -static unsigned int kho_scratch_cnt; +struct kho_scratch *kho_scratch; +unsigned int kho_scratch_cnt; /* * The scratch areas are scaled by default as percent of memory allocated from @@ -752,6 +763,9 @@ int kho_preserve_folio(struct folio *folio) const unsigned int order = folio_order(folio); struct kho_mem_track *track = &kho_out.ser.track; + if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order))) + return -EINVAL; + return __kho_preserve_order(track, pfn, order); } EXPORT_SYMBOL_GPL(kho_preserve_folio); @@ -775,6 +789,11 @@ int kho_preserve_pages(struct page *page, unsigned int nr_pages) unsigned long failed_pfn = 0; int err = 0; + if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT, + nr_pages << PAGE_SHIFT))) { + return -EINVAL; + } + while (pfn < end_pfn) { const unsigned int order = min(count_trailing_zeros(pfn), ilog2(end_pfn - pfn)); diff --git a/kernel/kexec_handover_debug.c b/kernel/kexec_handover_debug.c new file mode 100644 index 000000000000..6efb696f5426 --- /dev/null +++ b/kernel/kexec_handover_debug.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * kexec_handover_debug.c - kexec handover optional debug functionality + * Copyright (C) 2025 Google LLC, Pasha Tatashin + */ + +#define pr_fmt(fmt) "KHO: " fmt + +#include "kexec_handover_internal.h" + +bool kho_scratch_overlap(phys_addr_t phys, size_t size) +{ + phys_addr_t scratch_start, scratch_end; + unsigned int i; + + for (i = 0; i < kho_scratch_cnt; i++) { + scratch_start = kho_scratch[i].addr; + scratch_end = kho_scratch[i].addr + kho_scratch[i].size; + + if (phys < scratch_end && (phys + size) > scratch_start) + return true; + } + + return false; +} diff --git a/kernel/kexec_handover_internal.h b/kernel/kexec_handover_internal.h new file mode 100644 index 000000000000..3c3c7148ceed --- /dev/null +++ b/kernel/kexec_handover_internal.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H +#define LINUX_KEXEC_HANDOVER_INTERNAL_H + +#include +#include + +extern struct kho_scratch *kho_scratch; +extern unsigned int kho_scratch_cnt; + +#ifdef CONFIG_KEXEC_HANDOVER_DEBUG +bool kho_scratch_overlap(phys_addr_t phys, size_t size); +#else +static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size) +{ + return false; +} +#endif /* CONFIG_KEXEC_HANDOVER_DEBUG */ + +#endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */ From a2fff99f92dae9c0eaf0d75de3def70ec68dad92 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Mon, 20 Oct 2025 20:08:51 -0400 Subject: [PATCH 0878/1075] kho: increase metadata bitmap size to PAGE_SIZE KHO memory preservation metadata is preserved in 512 byte chunks which requires their allocation from slab allocator. Slabs are not safe to be used with KHO because of kfence, and because partial slabs may lead leaks to the next kernel. Change the size to be PAGE_SIZE. The kfence specifically may cause memory corruption, where it randomly provides slab objects that can be within the scratch area. The reason for that is that kfence allocates its objects prior to KHO scratch is marked as CMA region. While this change could potentially increase metadata overhead on systems with sparsely preserved memory, this is being mitigated by ongoing work to reduce sparseness during preservation via 1G guest pages. Furthermore, this change aligns with future work on a stateless KHO, which will also use page-sized bitmaps for its radix tree metadata. Link: https://lkml.kernel.org/r/20251021000852.2924827-3-pasha.tatashin@soleen.com Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation") Signed-off-by: Pasha Tatashin Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Cc: Alexander Graf Cc: Christian Brauner Cc: David Matlack Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Miguel Ojeda Cc: Randy Dunlap Cc: Samiullah Khawaja Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- kernel/kexec_handover.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 0bc9001e532a..9217d2fdd2d3 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -69,10 +69,10 @@ early_param("kho", kho_parse_enable); * Keep track of memory that is to be preserved across KHO. * * The serializing side uses two levels of xarrays to manage chunks of per-order - * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a - * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations - * each bitmap will cover 16M of address space. Thus, for 16G of memory at most - * 512K of bitmap memory will be needed for order 0. + * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order + * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0 + * allocations each bitmap will cover 128M of address space. Thus, for 16G of + * memory at most 512K of bitmap memory will be needed for order 0. * * This approach is fully incremental, as the serialization progresses folios * can continue be aggregated to the tracker. The final step, immediately prior @@ -80,12 +80,14 @@ early_param("kho", kho_parse_enable); * successor kernel to parse. */ -#define PRESERVE_BITS (512 * 8) +#define PRESERVE_BITS (PAGE_SIZE * 8) struct kho_mem_phys_bits { DECLARE_BITMAP(preserve, PRESERVE_BITS); }; +static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE); + struct kho_mem_phys { /* * Points to kho_mem_phys_bits, a sparse bitmap array. Each bit is sized @@ -133,19 +135,19 @@ static struct kho_out kho_out = { .finalized = false, }; -static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz) +static void *xa_load_or_alloc(struct xarray *xa, unsigned long index) { void *res = xa_load(xa, index); if (res) return res; - void *elm __free(kfree) = kzalloc(sz, GFP_KERNEL); + void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL); if (!elm) return ERR_PTR(-ENOMEM); - if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), sz))) + if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE))) return ERR_PTR(-EINVAL); res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL); @@ -218,8 +220,7 @@ static int __kho_preserve_order(struct kho_mem_track *track, unsigned long pfn, } } - bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS, - sizeof(*bits)); + bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS); if (IS_ERR(bits)) return PTR_ERR(bits); From fa759cd75bce5489eed34596daa53f721849a86f Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Mon, 20 Oct 2025 20:08:52 -0400 Subject: [PATCH 0879/1075] kho: allocate metadata directly from the buddy allocator KHO allocates metadata for its preserved memory map using the slab allocator via kzalloc(). This metadata is temporary and is used by the next kernel during early boot to find preserved memory. A problem arises when KFENCE is enabled. kzalloc() calls can be randomly intercepted by kfence_alloc(), which services the allocation from a dedicated KFENCE memory pool. This pool is allocated early in boot via memblock. When booting via KHO, the memblock allocator is restricted to a "scratch area", forcing the KFENCE pool to be allocated within it. This creates a conflict, as the scratch area is expected to be ephemeral and overwriteable by a subsequent kexec. If KHO metadata is placed in this KFENCE pool, it leads to memory corruption when the next kernel is loaded. To fix this, modify KHO to allocate its metadata directly from the buddy allocator instead of slab. Link: https://lkml.kernel.org/r/20251021000852.2924827-4-pasha.tatashin@soleen.com Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation") Signed-off-by: Pasha Tatashin Reviewed-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: David Matlack Cc: Alexander Graf Cc: Christian Brauner Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Miguel Ojeda Cc: Randy Dunlap Cc: Samiullah Khawaja Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton --- include/linux/gfp.h | 3 +++ kernel/kexec_handover.c | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0ceb4e09306c..623bee335383 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -7,6 +7,7 @@ #include #include #include +#include #include struct vm_area_struct; @@ -463,4 +464,6 @@ static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, /* This should be paired with folio_put() rather than free_contig_range(). */ #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__)) +DEFINE_FREE(free_page, void *, free_page((unsigned long)_T)) + #endif /* __LINUX_GFP_H */ diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 9217d2fdd2d3..2a8c20c238a8 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -142,7 +142,7 @@ static void *xa_load_or_alloc(struct xarray *xa, unsigned long index) if (res) return res; - void *elm __free(kfree) = kzalloc(PAGE_SIZE, GFP_KERNEL); + void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL); if (!elm) return ERR_PTR(-ENOMEM); @@ -348,9 +348,9 @@ static_assert(sizeof(struct khoser_mem_chunk) == PAGE_SIZE); static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk, unsigned long order) { - struct khoser_mem_chunk *chunk __free(kfree) = NULL; + struct khoser_mem_chunk *chunk __free(free_page) = NULL; - chunk = kzalloc(PAGE_SIZE, GFP_KERNEL); + chunk = (void *)get_zeroed_page(GFP_KERNEL); if (!chunk) return ERR_PTR(-ENOMEM); From fc745ff317566ec299e16346ebb9eacc8fe5b9d2 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Wed, 22 Oct 2025 18:57:19 +0800 Subject: [PATCH 0880/1075] mm/shmem: fix THP allocation and fallback loop The order check and fallback loop is updating the index value on every loop. This will cause the index to be wrongly aligned by a larger value while the loop shrinks the order. This may result in inserting and returning a folio of the wrong index and cause data corruption with some userspace workloads [1]. [kasong@tencent.com: introduce a temporary variable to improve code] Link: https://lkml.kernel.org/r/20251023065913.36925-1-ryncsn@gmail.com Link: https://lore.kernel.org/linux-mm/CAMgjq7DqgAmj25nDUwwu1U2cSGSn8n4-Hqpgottedy0S6YYeUw@mail.gmail.com/ [1] Link: https://lkml.kernel.org/r/20251022105719.18321-1-ryncsn@gmail.com Link: https://lore.kernel.org/linux-mm/CAMgjq7DqgAmj25nDUwwu1U2cSGSn8n4-Hqpgottedy0S6YYeUw@mail.gmail.com/ [1] Fixes: e7a2ab7b3bb5 ("mm: shmem: add mTHP support for anonymous shmem") Closes: https://lore.kernel.org/linux-mm/CAMgjq7DqgAmj25nDUwwu1U2cSGSn8n4-Hqpgottedy0S6YYeUw@mail.gmail.com/ Signed-off-by: Kairui Song Acked-by: David Hildenbrand Acked-by: Zi Yan Reviewed-by: Baolin Wang Reviewed-by: Barry Song Reviewed-by: Lorenzo Stoakes Cc: Dev Jain Cc: Hugh Dickins Cc: Liam Howlett Cc: Matthew Wilcox (Oracle) Cc: Nico Pache Cc: Ryan Roberts Cc: Signed-off-by: Andrew Morton --- mm/shmem.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index b9081b817d28..58701d14dd96 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1882,6 +1882,7 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf, struct shmem_inode_info *info = SHMEM_I(inode); unsigned long suitable_orders = 0; struct folio *folio = NULL; + pgoff_t aligned_index; long pages; int error, order; @@ -1895,10 +1896,12 @@ static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf, order = highest_order(suitable_orders); while (suitable_orders) { pages = 1UL << order; - index = round_down(index, pages); - folio = shmem_alloc_folio(gfp, order, info, index); - if (folio) + aligned_index = round_down(index, pages); + folio = shmem_alloc_folio(gfp, order, info, aligned_index); + if (folio) { + index = aligned_index; goto allocated; + } if (pages == HPAGE_PMD_NR) count_vm_event(THP_FILE_FALLBACK); From 7e76b75e5ab3339bebab3a4738226cd9b27d8c42 Mon Sep 17 00:00:00 2001 From: Aleksei Nikiforov Date: Tue, 30 Sep 2025 13:56:01 +0200 Subject: [PATCH 0881/1075] mm/kmsan: fix kmsan kmalloc hook when no stack depots are allocated yet If no stack depot is allocated yet, due to masking out __GFP_RECLAIM flags kmsan called from kmalloc cannot allocate stack depot. kmsan fails to record origin and report issues. This may result in KMSAN failing to report issues. Reusing flags from kmalloc without modifying them should be safe for kmsan. For example, such chain of calls is possible: test_uninit_kmalloc -> kmalloc -> __kmalloc_cache_noprof -> slab_alloc_node -> slab_post_alloc_hook -> kmsan_slab_alloc -> kmsan_internal_poison_memory. Only when it is called in a context without flags present should __GFP_RECLAIM flags be masked. With this change all kmsan tests start working reliably. Eric reported: : Yes, KMSAN seems to be at least partially broken currently. Besides the : fact that the kmsan KUnit test is currently failing (which I reported at : https://lore.kernel.org/r/20250911175145.GA1376@sol), I've confirmed that : the poly1305 KUnit test causes a KMSAN warning with Aleksei's patch : applied but does not cause a warning without it. The warning did get : reached via syzbot somehow : (https://lore.kernel.org/r/751b3d80293a6f599bb07770afcef24f623c7da0.1761026343.git.xiaopei01@kylinos.cn/), : so KMSAN must still work in some cases. But it didn't work for me. Link: https://lkml.kernel.org/r/20250930115600.709776-2-aleksei.nikiforov@linux.ibm.com Link: https://lkml.kernel.org/r/20251022030213.GA35717@sol Fixes: 97769a53f117 ("mm, bpf: Introduce try_alloc_pages() for opportunistic page allocation") Signed-off-by: Aleksei Nikiforov Reviewed-by: Alexander Potapenko Tested-by: Eric Biggers Cc: Alexei Starovoitov Cc: Dmitriy Vyukov Cc: Ilya Leoshkevich Cc: Marco Elver Cc: Signed-off-by: Andrew Morton --- mm/kmsan/core.c | 3 --- mm/kmsan/hooks.c | 6 ++++-- mm/kmsan/shadow.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/mm/kmsan/core.c b/mm/kmsan/core.c index 8bca7fece47f..35ceaa8adb41 100644 --- a/mm/kmsan/core.c +++ b/mm/kmsan/core.c @@ -72,9 +72,6 @@ depot_stack_handle_t kmsan_save_stack_with_flags(gfp_t flags, nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0); - /* Don't sleep. */ - flags &= ~(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM); - handle = stack_depot_save(entries, nr_entries, flags); return stack_depot_set_extra_bits(handle, extra); } diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c index 2cee59d89c80..8f22d1f22981 100644 --- a/mm/kmsan/hooks.c +++ b/mm/kmsan/hooks.c @@ -84,7 +84,8 @@ void kmsan_slab_free(struct kmem_cache *s, void *object) if (s->ctor) return; kmsan_enter_runtime(); - kmsan_internal_poison_memory(object, s->object_size, GFP_KERNEL, + kmsan_internal_poison_memory(object, s->object_size, + GFP_KERNEL & ~(__GFP_RECLAIM), KMSAN_POISON_CHECK | KMSAN_POISON_FREE); kmsan_leave_runtime(); } @@ -114,7 +115,8 @@ void kmsan_kfree_large(const void *ptr) kmsan_enter_runtime(); page = virt_to_head_page((void *)ptr); KMSAN_WARN_ON(ptr != page_address(page)); - kmsan_internal_poison_memory((void *)ptr, page_size(page), GFP_KERNEL, + kmsan_internal_poison_memory((void *)ptr, page_size(page), + GFP_KERNEL & ~(__GFP_RECLAIM), KMSAN_POISON_CHECK | KMSAN_POISON_FREE); kmsan_leave_runtime(); } diff --git a/mm/kmsan/shadow.c b/mm/kmsan/shadow.c index 54f3c3c962f0..55fdea199aaf 100644 --- a/mm/kmsan/shadow.c +++ b/mm/kmsan/shadow.c @@ -208,7 +208,7 @@ void kmsan_free_page(struct page *page, unsigned int order) return; kmsan_enter_runtime(); kmsan_internal_poison_memory(page_address(page), page_size(page), - GFP_KERNEL, + GFP_KERNEL & ~(__GFP_RECLAIM), KMSAN_POISON_CHECK | KMSAN_POISON_FREE); kmsan_leave_runtime(); } From f5548c318d6520d4fa3c5ed6003eeb710763cbc5 Mon Sep 17 00:00:00 2001 From: Pedro Demarchi Gomes Date: Wed, 22 Oct 2025 12:30:59 -0300 Subject: [PATCH 0882/1075] ksm: use range-walk function to jump over holes in scan_get_next_rmap_item Currently, scan_get_next_rmap_item() walks every page address in a VMA to locate mergeable pages. This becomes highly inefficient when scanning large virtual memory areas that contain mostly unmapped regions, causing ksmd to use large amount of cpu without deduplicating much pages. This patch replaces the per-address lookup with a range walk using walk_page_range(). The range walker allows KSM to skip over entire unmapped holes in a VMA, avoiding unnecessary lookups. This problem was previously discussed in [1]. Consider the following test program which creates a 32 TiB mapping in the virtual address space but only populates a single page: #include #include #include /* 32 TiB */ const size_t size = 32ul * 1024 * 1024 * 1024 * 1024; int main() { char *area = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_NORESERVE | MAP_PRIVATE | MAP_ANON, -1, 0); if (area == MAP_FAILED) { perror("mmap() failed\n"); return -1; } /* Populate a single page such that we get an anon_vma. */ *area = 0; /* Enable KSM. */ madvise(area, size, MADV_MERGEABLE); pause(); return 0; } $ ./ksm-sparse & $ echo 1 > /sys/kernel/mm/ksm/run Without this patch ksmd uses 100% of the cpu for a long time (more then 1 hour in my test machine) scanning all the 32 TiB virtual address space that contain only one mapped page. This makes ksmd essentially deadlocked not able to deduplicate anything of value. With this patch ksmd walks only the one mapped page and skips the rest of the 32 TiB virtual address space, making the scan fast using little cpu. Link: https://lkml.kernel.org/r/20251023035841.41406-1-pedrodemargomes@gmail.com Link: https://lkml.kernel.org/r/20251022153059.22763-1-pedrodemargomes@gmail.com Link: https://lore.kernel.org/linux-mm/423de7a3-1c62-4e72-8e79-19a6413e420c@redhat.com/ [1] Fixes: 31dbd01f3143 ("ksm: Kernel SamePage Merging") Signed-off-by: Pedro Demarchi Gomes Co-developed-by: David Hildenbrand Signed-off-by: David Hildenbrand Reported-by: craftfever Closes: https://lkml.kernel.org/r/020cf8de6e773bb78ba7614ef250129f11a63781@murena.io Suggested-by: David Hildenbrand Acked-by: David Hildenbrand Cc: Chengming Zhou Cc: xu xin Cc: Signed-off-by: Andrew Morton --- mm/ksm.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 104 insertions(+), 9 deletions(-) diff --git a/mm/ksm.c b/mm/ksm.c index 7bc726b50b2f..c4e730409949 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2455,6 +2455,95 @@ static bool should_skip_rmap_item(struct folio *folio, return true; } +struct ksm_next_page_arg { + struct folio *folio; + struct page *page; + unsigned long addr; +}; + +static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct ksm_next_page_arg *private = walk->private; + struct vm_area_struct *vma = walk->vma; + pte_t *start_ptep = NULL, *ptep, pte; + struct mm_struct *mm = walk->mm; + struct folio *folio; + struct page *page; + spinlock_t *ptl; + pmd_t pmd; + + if (ksm_test_exit(mm)) + return 0; + + cond_resched(); + + pmd = pmdp_get_lockless(pmdp); + if (!pmd_present(pmd)) + return 0; + + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) { + ptl = pmd_lock(mm, pmdp); + pmd = pmdp_get(pmdp); + + if (!pmd_present(pmd)) { + goto not_found_unlock; + } else if (pmd_leaf(pmd)) { + page = vm_normal_page_pmd(vma, addr, pmd); + if (!page) + goto not_found_unlock; + folio = page_folio(page); + + if (folio_is_zone_device(folio) || !folio_test_anon(folio)) + goto not_found_unlock; + + page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT); + goto found_unlock; + } + spin_unlock(ptl); + } + + start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); + if (!start_ptep) + return 0; + + for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) { + pte = ptep_get(ptep); + + if (!pte_present(pte)) + continue; + + page = vm_normal_page(vma, addr, pte); + if (!page) + continue; + folio = page_folio(page); + + if (folio_is_zone_device(folio) || !folio_test_anon(folio)) + continue; + goto found_unlock; + } + +not_found_unlock: + spin_unlock(ptl); + if (start_ptep) + pte_unmap(start_ptep); + return 0; +found_unlock: + folio_get(folio); + spin_unlock(ptl); + if (start_ptep) + pte_unmap(start_ptep); + private->page = page; + private->folio = folio; + private->addr = addr; + return 1; +} + +static struct mm_walk_ops ksm_next_page_ops = { + .pmd_entry = ksm_next_page_pmd_entry, + .walk_lock = PGWALK_RDLOCK, +}; + static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) { struct mm_struct *mm; @@ -2542,21 +2631,27 @@ static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) ksm_scan.address = vma->vm_end; while (ksm_scan.address < vma->vm_end) { + struct ksm_next_page_arg ksm_next_page_arg; struct page *tmp_page = NULL; - struct folio_walk fw; struct folio *folio; if (ksm_test_exit(mm)) break; - folio = folio_walk_start(&fw, vma, ksm_scan.address, 0); - if (folio) { - if (!folio_is_zone_device(folio) && - folio_test_anon(folio)) { - folio_get(folio); - tmp_page = fw.page; - } - folio_walk_end(&fw, vma); + int found; + + found = walk_page_range_vma(vma, ksm_scan.address, + vma->vm_end, + &ksm_next_page_ops, + &ksm_next_page_arg); + + if (found > 0) { + folio = ksm_next_page_arg.folio; + tmp_page = ksm_next_page_arg.page; + ksm_scan.address = ksm_next_page_arg.addr; + } else { + VM_WARN_ON_ONCE(found < 0); + ksm_scan.address = vma->vm_end - PAGE_SIZE; } if (tmp_page) { From fa5a061700364bc28ee1cb1095372f8033645dcb Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 22 Oct 2025 23:05:21 -0400 Subject: [PATCH 0883/1075] mm/huge_memory: preserve PG_has_hwpoisoned if a folio is split to >0 order folio split clears PG_has_hwpoisoned, but the flag should be preserved in after-split folios containing pages with PG_hwpoisoned flag if the folio is split to >0 order folios. Scan all pages in a to-be-split folio to determine which after-split folios need the flag. An alternatives is to change PG_has_hwpoisoned to PG_maybe_hwpoisoned to avoid the scan and set it on all after-split folios, but resulting false positive has undesirable negative impact. To remove false positive, caller of folio_test_has_hwpoisoned() and folio_contain_hwpoisoned_page() needs to do the scan. That might be causing a hassle for current and future callers and more costly than doing the scan in the split code. More details are discussed in [1]. This issue can be exposed via: 1. splitting a has_hwpoisoned folio to >0 order from debugfs interface; 2. truncating part of a has_hwpoisoned folio in truncate_inode_partial_folio(). And later accesses to a hwpoisoned page could be possible due to the missing has_hwpoisoned folio flag. This will lead to MCE errors. Link: https://lore.kernel.org/all/CAHbLzkoOZm0PXxE9qwtF4gKR=cpRXrSrJ9V9Pm2DJexs985q4g@mail.gmail.com/ [1] Link: https://lkml.kernel.org/r/20251023030521.473097-1-ziy@nvidia.com Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages") Signed-off-by: Zi Yan Acked-by: David Hildenbrand Reviewed-by: Yang Shi Reviewed-by: Lorenzo Stoakes Reviewed-by: Lance Yang Reviewed-by: Miaohe Lin Reviewed-by: Baolin Wang Reviewed-by: Wei Yang Cc: Pankaj Raghav Cc: Barry Song Cc: Dev Jain Cc: Jane Chu Cc: Liam Howlett Cc: Luis Chamberalin Cc: Matthew Wilcox (Oracle) Cc: Naoya Horiguchi Cc: Nico Pache Cc: Ryan Roberts Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index feac4aef7dfb..b4ff49d96501 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3263,6 +3263,14 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) caller_pins; } +static bool page_range_has_hwpoisoned(struct page *page, long nr_pages) +{ + for (; nr_pages; page++, nr_pages--) + if (PageHWPoison(page)) + return true; + return false; +} + /* * It splits @folio into @new_order folios and copies the @folio metadata to * all the resulting folios. @@ -3270,17 +3278,24 @@ bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins) static void __split_folio_to_order(struct folio *folio, int old_order, int new_order) { + /* Scan poisoned pages when split a poisoned folio to large folios */ + const bool handle_hwpoison = folio_test_has_hwpoisoned(folio) && new_order; long new_nr_pages = 1 << new_order; long nr_pages = 1 << old_order; long i; + folio_clear_has_hwpoisoned(folio); + + /* Check first new_nr_pages since the loop below skips them */ + if (handle_hwpoison && + page_range_has_hwpoisoned(folio_page(folio, 0), new_nr_pages)) + folio_set_has_hwpoisoned(folio); /* * Skip the first new_nr_pages, since the new folio from them have all * the flags from the original folio. */ for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) { struct page *new_head = &folio->page + i; - /* * Careful: new_folio is not a "real" folio before we cleared PageTail. * Don't pass it around before clear_compound_head(). @@ -3322,6 +3337,10 @@ static void __split_folio_to_order(struct folio *folio, int old_order, (1L << PG_dirty) | LRU_GEN_MASK | LRU_REFS_MASK)); + if (handle_hwpoison && + page_range_has_hwpoisoned(new_head, new_nr_pages)) + folio_set_has_hwpoisoned(new_folio); + new_folio->mapping = folio->mapping; new_folio->index = folio->index + i; @@ -3422,8 +3441,6 @@ static int __split_unmapped_folio(struct folio *folio, int new_order, if (folio_test_anon(folio)) mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1); - folio_clear_has_hwpoisoned(folio); - /* * split to new_order one order at a time. For uniform split, * folio is split to new_order directly. From 895b4c0c79b092d732544011c3cecaf7322c36a1 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 25 Oct 2025 10:42:33 +0800 Subject: [PATCH 0884/1075] fs/proc: fix uaf in proc_readdir_de() Pde is erased from subdir rbtree through rb_erase(), but not set the node to EMPTY, which may result in uaf access. We should use RB_CLEAR_NODE() set the erased node to EMPTY, then pde_subdir_next() will return NULL to avoid uaf access. We found an uaf issue while using stress-ng testing, need to run testcase getdent and tun in the same time. The steps of the issue is as follows: 1) use getdent to traverse dir /proc/pid/net/dev_snmp6/, and current pde is tun3; 2) in the [time windows] unregister netdevice tun3 and tun2, and erase them from rbtree. erase tun3 first, and then erase tun2. the pde(tun2) will be released to slab; 3) continue to getdent process, then pde_subdir_next() will return pde(tun2) which is released, it will case uaf access. CPU 0 | CPU 1 ------------------------------------------------------------------------- traverse dir /proc/pid/net/dev_snmp6/ | unregister_netdevice(tun->dev) //tun3 tun2 sys_getdents64() | iterate_dir() | proc_readdir() | proc_readdir_de() | snmp6_unregister_dev() pde_get(de); | proc_remove() read_unlock(&proc_subdir_lock); | remove_proc_subtree() | write_lock(&proc_subdir_lock); [time window] | rb_erase(&root->subdir_node, &parent->subdir); | write_unlock(&proc_subdir_lock); read_lock(&proc_subdir_lock); | next = pde_subdir_next(de); | pde_put(de); | de = next; //UAF | rbtree of dev_snmp6 | pde(tun3) / \ NULL pde(tun2) Link: https://lkml.kernel.org/r/20251025024233.158363-1-albin_yang@163.com Signed-off-by: Wei Yang Cc: Al Viro Cc: Christian Brauner Cc: wangzijie Cc: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton --- fs/proc/generic.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 176281112273..501889856461 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -698,6 +698,12 @@ void pde_put(struct proc_dir_entry *pde) } } +static void pde_erase(struct proc_dir_entry *pde, struct proc_dir_entry *parent) +{ + rb_erase(&pde->subdir_node, &parent->subdir); + RB_CLEAR_NODE(&pde->subdir_node); +} + /* * Remove a /proc entry and free it if it's not currently in use. */ @@ -720,7 +726,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent) WARN(1, "removing permanent /proc entry '%s'", de->name); de = NULL; } else { - rb_erase(&de->subdir_node, &parent->subdir); + pde_erase(de, parent); if (S_ISDIR(de->mode)) parent->nlink--; } @@ -764,7 +770,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) root->parent->name, root->name); return -EINVAL; } - rb_erase(&root->subdir_node, &parent->subdir); + pde_erase(root, parent); de = root; while (1) { @@ -776,7 +782,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent) next->parent->name, next->name); return -EINVAL; } - rb_erase(&next->subdir_node, &de->subdir); + pde_erase(next, de); de = next; continue; } From 74207de2ba10c2973334906822dc94d2e859ffc5 Mon Sep 17 00:00:00 2001 From: Kiryl Shutsemau Date: Mon, 27 Oct 2025 11:56:35 +0000 Subject: [PATCH 0885/1075] mm/memory: do not populate page table entries beyond i_size Patch series "Fix SIGBUS semantics with large folios", v3. Accessing memory within a VMA, but beyond i_size rounded up to the next page size, is supposed to generate SIGBUS. Darrick reported[1] an xfstests regression in v6.18-rc1. generic/749 failed due to missing SIGBUS. This was caused by my recent changes that try to fault in the whole folio where possible: 19773df031bc ("mm/fault: try to map the entire file folio in finish_fault()") 357b92761d94 ("mm/filemap: map entire large folio faultaround") These changes did not consider i_size when setting up PTEs, leading to xfstest breakage. However, the problem has been present in the kernel for a long time - since huge tmpfs was introduced in 2016. The kernel happily maps PMD-sized folios as PMD without checking i_size. And huge=always tmpfs allocates PMD-size folios on any writes. I considered this corner case when I implemented a large tmpfs, and my conclusion was that no one in their right mind should rely on receiving a SIGBUS signal when accessing beyond i_size. I cannot imagine how it could be useful for the workload. But apparently filesystem folks care a lot about preserving strict SIGBUS semantics. Generic/749 was introduced last year with reference to POSIX, but no real workloads were mentioned. It also acknowledged the tmpfs deviation from the test case. POSIX indeed says[3]: References within the address range starting at pa and continuing for len bytes to whole pages following the end of an object shall result in delivery of a SIGBUS signal. The patchset fixes the regression introduced by recent changes as well as more subtle SIGBUS breakage due to split failure on truncation. This patch (of 2): Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are supposed to generate SIGBUS. Recent changes attempted to fault in full folio where possible. They did not respect i_size, which led to populating PTEs beyond i_size and breaking SIGBUS semantics. Darrick reported generic/749 breakage because of this. However, the problem existed before the recent changes. With huge=always tmpfs, any write to a file leads to PMD-size allocation. Following the fault-in of the folio will install PMD mapping regardless of i_size. Fix filemap_map_pages() and finish_fault() to not install: - PTEs beyond i_size; - PMD mappings across i_size; Make an exception for shmem/tmpfs that for long time intentionally mapped with PMDs across i_size. Link: https://lkml.kernel.org/r/20251027115636.82382-1-kirill@shutemov.name Link: https://lkml.kernel.org/r/20251027115636.82382-2-kirill@shutemov.name Signed-off-by: Kiryl Shutsemau Fixes: 6795801366da ("xfs: Support large folios") Reported-by: "Darrick J. Wong" Cc: Al Viro Cc: Baolin Wang Cc: Christian Brauner Cc: Dave Chinner Cc: David Hildenbrand Cc: Hugh Dickins Cc: Johannes Weiner Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Rik van Riel Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/filemap.c | 28 ++++++++++++++++++++-------- mm/memory.c | 20 +++++++++++++++++++- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 13f0259d993c..2f1e7e283a51 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3681,7 +3681,8 @@ static struct folio *next_uptodate_folio(struct xa_state *xas, static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, struct folio *folio, unsigned long start, unsigned long addr, unsigned int nr_pages, - unsigned long *rss, unsigned short *mmap_miss) + unsigned long *rss, unsigned short *mmap_miss, + bool can_map_large) { unsigned int ref_from_caller = 1; vm_fault_t ret = 0; @@ -3696,7 +3697,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, * The folio must not cross VMA or page table boundary. */ addr0 = addr - start * PAGE_SIZE; - if (folio_within_vma(folio, vmf->vma) && + if (can_map_large && folio_within_vma(folio, vmf->vma) && (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) { vmf->pte -= start; page -= start; @@ -3811,13 +3812,27 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, unsigned long rss = 0; unsigned int nr_pages = 0, folio_type; unsigned short mmap_miss = 0, mmap_miss_saved; + bool can_map_large; rcu_read_lock(); folio = next_uptodate_folio(&xas, mapping, end_pgoff); if (!folio) goto out; - if (filemap_map_pmd(vmf, folio, start_pgoff)) { + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; + end_pgoff = min(end_pgoff, file_end); + + /* + * Do not allow to map with PTEs beyond i_size and with PMD + * across i_size to preserve SIGBUS semantics. + * + * Make an exception for shmem/tmpfs that for long time + * intentionally mapped with PMDs across i_size. + */ + can_map_large = shmem_mapping(mapping) || + file_end >= folio_next_index(folio); + + if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) { ret = VM_FAULT_NOPAGE; goto out; } @@ -3830,10 +3845,6 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, goto out; } - file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; - if (end_pgoff > file_end) - end_pgoff = file_end; - folio_type = mm_counter_file(folio); do { unsigned long end; @@ -3850,7 +3861,8 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, else ret |= filemap_map_folio_range(vmf, folio, xas.xa_index - folio->index, addr, - nr_pages, &rss, &mmap_miss); + nr_pages, &rss, &mmap_miss, + can_map_large); folio_unlock(folio); } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL); diff --git a/mm/memory.c b/mm/memory.c index 74b45e258323..b59ae7ce42eb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -5501,8 +5502,25 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return ret; } + if (!needs_fallback && vma->vm_file) { + struct address_space *mapping = vma->vm_file->f_mapping; + pgoff_t file_end; + + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); + + /* + * Do not allow to map with PTEs beyond i_size and with PMD + * across i_size to preserve SIGBUS semantics. + * + * Make an exception for shmem/tmpfs that for long time + * intentionally mapped with PMDs across i_size. + */ + needs_fallback = !shmem_mapping(mapping) && + file_end < folio_next_index(folio); + } + if (pmd_none(*vmf->pmd)) { - if (folio_test_pmd_mappable(folio)) { + if (!needs_fallback && folio_test_pmd_mappable(folio)) { ret = do_set_pmd(vmf, folio, page); if (ret != VM_FAULT_FALLBACK) return ret; From fa04f5b60fda62c98a53a60de3a1e763f11feb41 Mon Sep 17 00:00:00 2001 From: Kiryl Shutsemau Date: Mon, 27 Oct 2025 11:56:36 +0000 Subject: [PATCH 0886/1075] mm/truncate: unmap large folio on split failure Accesses within VMA, but beyond i_size rounded up to PAGE_SIZE are supposed to generate SIGBUS. This behavior might not be respected on truncation. During truncation, the kernel splits a large folio in order to reclaim memory. As a side effect, it unmaps the folio and destroys PMD mappings of the folio. The folio will be refaulted as PTEs and SIGBUS semantics are preserved. However, if the split fails, PMD mappings are preserved and the user will not receive SIGBUS on any accesses within the PMD. Unmap the folio on split failure. It will lead to refault as PTEs and preserve SIGBUS semantics. Make an exception for shmem/tmpfs that for long time intentionally mapped with PMDs across i_size. Link: https://lkml.kernel.org/r/20251027115636.82382-3-kirill@shutemov.name Fixes: b9a8a4195c7d ("truncate,shmem: Handle truncates that split large folios") Signed-off-by: Kiryl Shutsemau Cc: Al Viro Cc: Baolin Wang Cc: Christian Brauner Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: David Hildenbrand Cc: Hugh Dickins Cc: Johannes Weiner Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Mike Rapoport Cc: Rik van Riel Cc: Shakeel Butt Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/truncate.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/mm/truncate.c b/mm/truncate.c index 9210cf808f5c..3c5a50ae3274 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -177,6 +177,32 @@ int truncate_inode_folio(struct address_space *mapping, struct folio *folio) return 0; } +static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at, + unsigned long min_order) +{ + enum ttu_flags ttu_flags = + TTU_SYNC | + TTU_SPLIT_HUGE_PMD | + TTU_IGNORE_MLOCK; + int ret; + + ret = try_folio_split_to_order(folio, split_at, min_order); + + /* + * If the split fails, unmap the folio, so it will be refaulted + * with PTEs to respect SIGBUS semantics. + * + * Make an exception for shmem/tmpfs that for long time + * intentionally mapped with PMDs across i_size. + */ + if (ret && !shmem_mapping(folio->mapping)) { + try_to_unmap(folio, ttu_flags); + WARN_ON(folio_mapped(folio)); + } + + return ret; +} + /* * Handle partial folios. The folio may be entirely within the * range if a split has raced with us. If not, we zero the part of the @@ -226,7 +252,7 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) min_order = mapping_min_folio_order(folio->mapping); split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); - if (!try_folio_split_to_order(folio, split_at, min_order)) { + if (!try_folio_split_or_unmap(folio, split_at, min_order)) { /* * try to split at offset + length to make sure folios within * the range can be dropped, especially to avoid memory waste @@ -250,13 +276,10 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) if (!folio_trylock(folio2)) goto out; - /* - * make sure folio2 is large and does not change its mapping. - * Its split result does not matter here. - */ + /* make sure folio2 is large and does not change its mapping */ if (folio_test_large(folio2) && folio2->mapping == folio->mapping) - try_folio_split_to_order(folio2, split_at2, min_order); + try_folio_split_or_unmap(folio2, split_at2, min_order); folio_unlock(folio2); out: From 0d6c356dd6547adac2b06b461528e3573f52d953 Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 28 Oct 2025 12:10:12 -0700 Subject: [PATCH 0887/1075] mm/mm_init: fix hash table order logging in alloc_large_system_hash() When emitting the order of the allocation for a hash table, alloc_large_system_hash() unconditionally subtracts PAGE_SHIFT from log base 2 of the allocation size. This is not correct if the allocation size is smaller than a page, and yields a negative value for the order as seen below: TCP established hash table entries: 32 (order: -4, 256 bytes, linear) TCP bind hash table entries: 32 (order: -2, 1024 bytes, linear) Use get_order() to compute the order when emitting the hash table information to correctly handle cases where the allocation size is smaller than a page: TCP established hash table entries: 32 (order: 0, 256 bytes, linear) TCP bind hash table entries: 32 (order: 0, 1024 bytes, linear) Link: https://lkml.kernel.org/r/20251028191020.413002-1-isaacmanjarres@google.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Isaac J. Manjarres Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: David Hildenbrand Cc: Signed-off-by: Andrew Morton --- mm/mm_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mm_init.c b/mm/mm_init.c index 3db2dea7db4c..7712d887b696 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2469,7 +2469,7 @@ void *__init alloc_large_system_hash(const char *tablename, panic("Failed to allocate %s hash table\n", tablename); pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", - tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, + tablename, 1UL << log2qty, get_order(size), size, virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear"); if (_hash_shift) From ec4d11fc4b2dd4a2fa8c9d801ee9753b74623554 Mon Sep 17 00:00:00 2001 From: Peter Oberparleiter Date: Tue, 28 Oct 2025 12:51:25 +0100 Subject: [PATCH 0888/1075] gcov: add support for GCC 15 Using gcov on kernels compiled with GCC 15 results in truncated 16-byte long .gcda files with no usable data. To fix this, update GCOV_COUNTERS to match the value defined by GCC 15. Tested with GCC 14.3.0 and GCC 15.2.0. Link: https://lkml.kernel.org/r/20251028115125.1319410-1-oberpar@linux.ibm.com Signed-off-by: Peter Oberparleiter Reported-by: Matthieu Baerts Closes: https://github.com/linux-test-project/lcov/issues/445 Tested-by: Matthieu Baerts Cc: Signed-off-by: Andrew Morton --- kernel/gcov/gcc_4_7.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/gcov/gcc_4_7.c b/kernel/gcov/gcc_4_7.c index a08cc076f332..ffde93d051a4 100644 --- a/kernel/gcov/gcc_4_7.c +++ b/kernel/gcov/gcc_4_7.c @@ -18,7 +18,9 @@ #include #include "gcov.h" -#if (__GNUC__ >= 14) +#if (__GNUC__ >= 15) +#define GCOV_COUNTERS 10 +#elif (__GNUC__ >= 14) #define GCOV_COUNTERS 9 #elif (__GNUC__ >= 10) #define GCOV_COUNTERS 8 From 04d1c9d60c6ec4c0003d433572eaa45f8b217788 Mon Sep 17 00:00:00 2001 From: Dev Jain Date: Tue, 28 Oct 2025 12:09:52 +0530 Subject: [PATCH 0889/1075] mm/mremap: honour writable bit in mremap pte batching Currently mremap folio pte batch ignores the writable bit during figuring out a set of similar ptes mapping the same folio. Suppose that the first pte of the batch is writable while the others are not - set_ptes will end up setting the writable bit on the other ptes, which is a violation of mremap semantics. Therefore, use FPB_RESPECT_WRITE to check the writable bit while determining the pte batch. Link: https://lkml.kernel.org/r/20251028063952.90313-1-dev.jain@arm.com Signed-off-by: Dev Jain Fixes: f822a9a81a31 ("mm: optimize mremap() by PTE batching") Reported-by: David Hildenbrand Debugged-by: David Hildenbrand Acked-by: David Hildenbrand Acked-by: Pedro Falcato Reviewed-by: Lorenzo Stoakes Cc: Barry Song Cc: Jann Horn Cc: Liam Howlett Cc: Vlastimil Babka Cc: [6.17+] Signed-off-by: Andrew Morton --- mm/mremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mremap.c b/mm/mremap.c index bd7314898ec5..419a0ea0a870 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -187,7 +187,7 @@ static int mremap_folio_pte_batch(struct vm_area_struct *vma, unsigned long addr if (!folio || !folio_test_large(folio)) return 1; - return folio_pte_batch(folio, ptep, pte, max_nr); + return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, FPB_RESPECT_WRITE); } static int move_ptes(struct pagetable_move_control *pmc, From 1abbdf3d57aa964e572940d67c9ec5dc87710738 Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Wed, 29 Oct 2025 09:43:17 +0800 Subject: [PATCH 0890/1075] codetag: debug: handle existing CODETAG_EMPTY in mark_objexts_empty for slabobj_ext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When alloc_slab_obj_exts() fails and then later succeeds in allocating a slab extension vector, it calls handle_failed_objexts_alloc() to mark all objects in the vector as empty. As a result all objects in this slab (slabA) will have their extensions set to CODETAG_EMPTY. Later on if this slabA is used to allocate a slabobj_ext vector for another slab (slabB), we end up with the slabB->obj_exts pointing to a slabobj_ext vector that itself has a non-NULL slabobj_ext equal to CODETAG_EMPTY. When slabB gets freed, free_slab_obj_exts() is called to free slabB->obj_exts vector. free_slab_obj_exts() calls mark_objexts_empty(slabB->obj_exts) which will generate a warning because it expects slabobj_ext vectors to have a NULL obj_ext, not CODETAG_EMPTY. Modify mark_objexts_empty() to skip the warning and setting the obj_ext value if it's already set to CODETAG_EMPTY. To quickly detect this WARN, I modified the code from WARN_ON(slab_exts[offs].ref.ct) to BUG_ON(slab_exts[offs].ref.ct == 1); We then obtained this message: [21630.898561] ------------[ cut here ]------------ [21630.898596] kernel BUG at mm/slub.c:2050! [21630.898611] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP [21630.900372] Modules linked in: squashfs isofs vfio_iommu_type1 vhost_vsock vfio vhost_net vmw_vsock_virtio_transport_common vhost tap vhost_iotlb iommufd vsock binfmt_misc nfsv3 nfs_acl nfs lockd grace netfs tls rds dns_resolver tun brd overlay ntfs3 exfat btrfs blake2b_generic xor xor_neon raid6_pq loop sctp ip6_udp_tunnel udp_tunnel nft_fib_inet nft_fib_ipv4 nft_fib_ipv6 nft_fib nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables rfkill ip_set sunrpc vfat fat joydev sg sch_fq_codel nfnetlink virtio_gpu sr_mod cdrom drm_client_lib virtio_dma_buf drm_shmem_helper drm_kms_helper drm ghash_ce backlight virtio_net virtio_blk virtio_scsi net_failover virtio_console failover virtio_mmio dm_mirror dm_region_hash dm_log dm_multipath dm_mod fuse i2c_dev virtio_pci virtio_pci_legacy_dev virtio_pci_modern_dev virtio virtio_ring autofs4 aes_neon_bs aes_ce_blk [last unloaded: hwpoison_inject] [21630.909177] CPU: 3 UID: 0 PID: 3787 Comm: kylin-process-m Kdump: loaded Tainted: G        W           6.18.0-rc1+ #74 PREEMPT(voluntary) [21630.910495] Tainted: [W]=WARN [21630.910867] Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022 [21630.911625] pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [21630.912392] pc : __free_slab+0x228/0x250 [21630.912868] lr : __free_slab+0x18c/0x250[21630.913334] sp : ffff8000a02f73e0 [21630.913830] x29: ffff8000a02f73e0 x28: fffffdffc43fc800 x27: ffff0000c0011c40 [21630.914677] x26: ffff0000c000cac0 x25: ffff00010fe5e5f0 x24: ffff000102199b40 [21630.915469] x23: 0000000000000003 x22: 0000000000000003 x21: ffff0000c0011c40 [21630.916259] x20: fffffdffc4086600 x19: fffffdffc43fc800 x18: 0000000000000000 [21630.917048] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 [21630.917837] x14: 0000000000000000 x13: 0000000000000000 x12: ffff70001405ee66 [21630.918640] x11: 1ffff0001405ee65 x10: ffff70001405ee65 x9 : ffff800080a295dc [21630.919442] x8 : ffff8000a02f7330 x7 : 0000000000000000 x6 : 0000000000003000 [21630.920232] x5 : 0000000024924925 x4 : 0000000000000001 x3 : 0000000000000007 [21630.921021] x2 : 0000000000001b40 x1 : 000000000000001f x0 : 0000000000000001 [21630.921810] Call trace: [21630.922130]  __free_slab+0x228/0x250 (P) [21630.922669]  free_slab+0x38/0x118 [21630.923079]  free_to_partial_list+0x1d4/0x340 [21630.923591]  __slab_free+0x24c/0x348 [21630.924024]  ___cache_free+0xf0/0x110 [21630.924468]  qlist_free_all+0x78/0x130 [21630.924922]  kasan_quarantine_reduce+0x114/0x148 [21630.925525]  __kasan_slab_alloc+0x7c/0xb0 [21630.926006]  kmem_cache_alloc_noprof+0x164/0x5c8 [21630.926699]  __alloc_object+0x44/0x1f8 [21630.927153]  __create_object+0x34/0xc8 [21630.927604]  kmemleak_alloc+0xb8/0xd8 [21630.928052]  kmem_cache_alloc_noprof+0x368/0x5c8 [21630.928606]  getname_flags.part.0+0xa4/0x610 [21630.929112]  getname_flags+0x80/0xd8 [21630.929557]  vfs_fstatat+0xc8/0xe0 [21630.929975]  __do_sys_newfstatat+0xa0/0x100 [21630.930469]  __arm64_sys_newfstatat+0x90/0xd8 [21630.931046]  invoke_syscall+0xd4/0x258 [21630.931685]  el0_svc_common.constprop.0+0xb4/0x240 [21630.932467]  do_el0_svc+0x48/0x68 [21630.932972]  el0_svc+0x40/0xe0 [21630.933472]  el0t_64_sync_handler+0xa0/0xe8 [21630.934151]  el0t_64_sync+0x1ac/0x1b0 [21630.934923] Code: aa1803e0 97ffef2b a9446bf9 17ffff9c (d4210000) [21630.936461] SMP: stopping secondary CPUs [21630.939550] Starting crashdump kernel... [21630.940108] Bye! Link: https://lkml.kernel.org/r/20251029014317.1533488-1-hao.ge@linux.dev Fixes: 09c46563ff6d ("codetag: debug: introduce OBJEXTS_ALLOC_FAIL to mark failed slab_ext allocations") Signed-off-by: Hao Ge Reviewed-by: Suren Baghdasaryan Cc: Christoph Lameter (Ampere) Cc: David Rientjes Cc: gehao Cc: Roman Gushchin Cc: Shakeel Butt Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- mm/slub.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index f1a5373eee7b..1bf65c421325 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2046,7 +2046,11 @@ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) if (slab_exts) { unsigned int offs = obj_to_index(obj_exts_slab->slab_cache, obj_exts_slab, obj_exts); - /* codetag should be NULL */ + + if (unlikely(is_codetag_empty(&slab_exts[offs].ref))) + return; + + /* codetag should be NULL here */ WARN_ON(slab_exts[offs].ref.ct); set_codetag_empty(&slab_exts[offs].ref); } From 91a54090026f84ceffaa12ac53c99b9f162946f6 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 30 Oct 2025 16:55:05 +0100 Subject: [PATCH 0891/1075] maple_tree: fix tracepoint string pointers maple_tree tracepoints contain pointers to function names. Such a pointer is saved when a tracepoint logs an event. There's no guarantee that it's still valid when the event is parsed later and the pointer is dereferenced. The kernel warns about these unsafe pointers. event 'ma_read' has unsafe pointer field 'fn' WARNING: kernel/trace/trace.c:3779 at ignore_event+0x1da/0x1e4 Mark the function names as tracepoint_string() to fix the events. One case that doesn't work without my patch would be trace-cmd record to save the binary ringbuffer and trace-cmd report to parse it in userspace. The address of __func__ can't be dereferenced from userspace but tracepoint_string will add an entry to /sys/kernel/tracing/printk_formats Link: https://lkml.kernel.org/r/20251030155537.87972-1-martin@kaiser.cx Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Martin Kaiser Acked-by: Liam R. Howlett Cc: Signed-off-by: Andrew Morton --- lib/maple_tree.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 39bb779cb311..5aa4c9500018 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -64,6 +64,8 @@ #define CREATE_TRACE_POINTS #include +#define TP_FCT tracepoint_string(__func__) + /* * Kernel pointer hashing renders much of the maple tree dump useless as tagged * pointers get hashed to arbitrary values. @@ -2756,7 +2758,7 @@ static inline void mas_rebalance(struct ma_state *mas, MA_STATE(l_mas, mas->tree, mas->index, mas->last); MA_STATE(r_mas, mas->tree, mas->index, mas->last); - trace_ma_op(__func__, mas); + trace_ma_op(TP_FCT, mas); /* * Rebalancing occurs if a node is insufficient. Data is rebalanced @@ -2997,7 +2999,7 @@ static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); - trace_ma_op(__func__, mas); + trace_ma_op(TP_FCT, mas); mast.l = &l_mas; mast.r = &r_mas; @@ -3172,7 +3174,7 @@ static bool mas_is_span_wr(struct ma_wr_state *wr_mas) return false; } - trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); + trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry); return true; } @@ -3416,7 +3418,7 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) * of data may happen. */ mas = wr_mas->mas; - trace_ma_op(__func__, mas); + trace_ma_op(TP_FCT, mas); if (unlikely(!mas->index && mas->last == ULONG_MAX)) return mas_new_root(mas, wr_mas->entry); @@ -3552,7 +3554,7 @@ static inline void mas_wr_node_store(struct ma_wr_state *wr_mas, } else { memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); } - trace_ma_write(__func__, mas, 0, wr_mas->entry); + trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); mas_update_gap(mas); mas->end = new_end; return; @@ -3596,7 +3598,7 @@ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) mas->offset++; /* Keep mas accurate. */ } - trace_ma_write(__func__, mas, 0, wr_mas->entry); + trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); /* * Only update gap when the new entry is empty or there is an empty * entry in the original two ranges. @@ -3717,7 +3719,7 @@ static inline void mas_wr_append(struct ma_wr_state *wr_mas, mas_update_gap(mas); mas->end = new_end; - trace_ma_write(__func__, mas, new_end, wr_mas->entry); + trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry); return; } @@ -3731,7 +3733,7 @@ static void mas_wr_bnode(struct ma_wr_state *wr_mas) { struct maple_big_node b_node; - trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); + trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry); memset(&b_node, 0, sizeof(struct maple_big_node)); mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); mas_commit_b_node(wr_mas, &b_node); @@ -5062,7 +5064,7 @@ void *mas_store(struct ma_state *mas, void *entry) { MA_WR_STATE(wr_mas, mas, entry); - trace_ma_write(__func__, mas, 0, entry); + trace_ma_write(TP_FCT, mas, 0, entry); #ifdef CONFIG_DEBUG_MAPLE_TREE if (MAS_WARN_ON(mas, mas->index > mas->last)) pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, @@ -5163,7 +5165,7 @@ void mas_store_prealloc(struct ma_state *mas, void *entry) } store: - trace_ma_write(__func__, mas, 0, entry); + trace_ma_write(TP_FCT, mas, 0, entry); mas_wr_store_entry(&wr_mas); MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); mas_destroy(mas); @@ -5882,7 +5884,7 @@ void *mtree_load(struct maple_tree *mt, unsigned long index) MA_STATE(mas, mt, index, index); void *entry; - trace_ma_read(__func__, &mas); + trace_ma_read(TP_FCT, &mas); rcu_read_lock(); retry: entry = mas_start(&mas); @@ -5925,7 +5927,7 @@ int mtree_store_range(struct maple_tree *mt, unsigned long index, MA_STATE(mas, mt, index, last); int ret = 0; - trace_ma_write(__func__, &mas, 0, entry); + trace_ma_write(TP_FCT, &mas, 0, entry); if (WARN_ON_ONCE(xa_is_advanced(entry))) return -EINVAL; @@ -6148,7 +6150,7 @@ void *mtree_erase(struct maple_tree *mt, unsigned long index) void *entry = NULL; MA_STATE(mas, mt, index, index); - trace_ma_op(__func__, &mas); + trace_ma_op(TP_FCT, &mas); mtree_lock(mt); entry = mas_erase(&mas); @@ -6485,7 +6487,7 @@ void *mt_find(struct maple_tree *mt, unsigned long *index, unsigned long max) unsigned long copy = *index; #endif - trace_ma_read(__func__, &mas); + trace_ma_read(TP_FCT, &mas); if ((*index) > max) return NULL; From 2f6ce7e714ef842e43120ecd6a7ed287b502026d Mon Sep 17 00:00:00 2001 From: Quanmin Yan Date: Thu, 30 Oct 2025 10:07:45 +0800 Subject: [PATCH 0892/1075] mm/damon/stat: change last_refresh_jiffies to a global variable Patch series "mm/damon: fixes for the jiffies-related issues", v2. On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make jiffies wrap bugs appear earlier. However, this may cause the time_before() series of functions to return unexpected values, resulting in DAMON not functioning as intended. Meanwhile, similar issues exist in some specific user operation scenarios. This patchset addresses these issues. The first patch is about the DAMON_STAT module, and the second patch is about the core layer's sysfs. This patch (of 2): In DAMON_STAT's damon_stat_damon_call_fn(), time_before_eq() is used to avoid unnecessarily frequent stat update. On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make jiffies wrap bugs appear earlier. However, this causes time_before_eq() in DAMON_STAT to unexpectedly return true during the first 5 minutes after boot on 32-bit systems (see [1] for more explanation, which fixes another jiffies-related issue before). As a result, DAMON_STAT does not update any monitoring results during that period, which becomes more confusing when DAMON_STAT_ENABLED_DEFAULT is enabled. There is also an issue unrelated to the system's word size[2]: if the user stops DAMON_STAT just after last_refresh_jiffies is updated and restarts it after 5 seconds or a longer delay, last_refresh_jiffies will retain an older value, causing time_before_eq() to return false and the update to happen earlier than expected. Fix these issues by making last_refresh_jiffies a global variable and initializing it each time DAMON_STAT is started. Link: https://lkml.kernel.org/r/20251030020746.967174-2-yanquanmin1@huawei.com Link: https://lkml.kernel.org/r/20250822025057.1740854-1-ekffu200098@gmail.com [1] Link: https://lore.kernel.org/all/20251028143250.50144-1-sj@kernel.org/ [2] Fixes: fabdd1e911da ("mm/damon/stat: calculate and expose estimated memory bandwidth") Signed-off-by: Quanmin Yan Suggested-by: SeongJae Park Reviewed-by: SeongJae Park Cc: Kefeng Wang Cc: ze zuo Cc: Signed-off-by: Andrew Morton --- mm/damon/stat.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/damon/stat.c b/mm/damon/stat.c index d8010968bbed..bf8626859902 100644 --- a/mm/damon/stat.c +++ b/mm/damon/stat.c @@ -46,6 +46,8 @@ MODULE_PARM_DESC(aggr_interval_us, static struct damon_ctx *damon_stat_context; +static unsigned long damon_stat_last_refresh_jiffies; + static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c) { struct damon_target *t; @@ -130,13 +132,12 @@ static void damon_stat_set_idletime_percentiles(struct damon_ctx *c) static int damon_stat_damon_call_fn(void *data) { struct damon_ctx *c = data; - static unsigned long last_refresh_jiffies; /* avoid unnecessarily frequent stat update */ - if (time_before_eq(jiffies, last_refresh_jiffies + + if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies + msecs_to_jiffies(5 * MSEC_PER_SEC))) return 0; - last_refresh_jiffies = jiffies; + damon_stat_last_refresh_jiffies = jiffies; aggr_interval_us = c->attrs.aggr_interval; damon_stat_set_estimated_memory_bandwidth(c); @@ -210,6 +211,8 @@ static int damon_stat_start(void) err = damon_start(&damon_stat_context, 1, true); if (err) return err; + + damon_stat_last_refresh_jiffies = jiffies; call_control.data = damon_stat_context; return damon_call(damon_stat_context, &call_control); } From 9fd7bb5083d1e1027b8ac1e365c29921ab88b177 Mon Sep 17 00:00:00 2001 From: Quanmin Yan Date: Thu, 30 Oct 2025 10:07:46 +0800 Subject: [PATCH 0893/1075] mm/damon/sysfs: change next_update_jiffies to a global variable In DAMON's damon_sysfs_repeat_call_fn(), time_before() is used to compare the current jiffies with next_update_jiffies to determine whether to update the sysfs files at this moment. On 32-bit systems, the kernel initializes jiffies to "-5 minutes" to make jiffies wrap bugs appear earlier. However, this causes time_before() in damon_sysfs_repeat_call_fn() to unexpectedly return true during the first 5 minutes after boot on 32-bit systems (see [1] for more explanation, which fixes another jiffies-related issue before). As a result, DAMON does not update sysfs files during that period. There is also an issue unrelated to the system's word size[2]: if the user stops DAMON just after next_update_jiffies is updated and restarts it after 'refresh_ms' or a longer delay, next_update_jiffies will retain an older value, causing time_before() to return false and the update to happen earlier than expected. Fix these issues by making next_update_jiffies a global variable and initializing it each time DAMON is started. Link: https://lkml.kernel.org/r/20251030020746.967174-3-yanquanmin1@huawei.com Link: https://lkml.kernel.org/r/20250822025057.1740854-1-ekffu200098@gmail.com [1] Link: https://lore.kernel.org/all/20251029013038.66625-1-sj@kernel.org/ [2] Fixes: d809a7c64ba8 ("mm/damon/sysfs: implement refresh_ms file internal work") Suggested-by: SeongJae Park Reviewed-by: SeongJae Park Signed-off-by: Quanmin Yan Cc: Kefeng Wang Cc: ze zuo Cc: Signed-off-by: Andrew Morton --- mm/damon/sysfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index cd6815ecc04e..3c0d727788c8 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -1552,16 +1552,17 @@ static struct damon_ctx *damon_sysfs_build_ctx( return ctx; } +static unsigned long damon_sysfs_next_update_jiffies; + static int damon_sysfs_repeat_call_fn(void *data) { struct damon_sysfs_kdamond *sysfs_kdamond = data; - static unsigned long next_update_jiffies; if (!sysfs_kdamond->refresh_ms) return 0; - if (time_before(jiffies, next_update_jiffies)) + if (time_before(jiffies, damon_sysfs_next_update_jiffies)) return 0; - next_update_jiffies = jiffies + + damon_sysfs_next_update_jiffies = jiffies + msecs_to_jiffies(sysfs_kdamond->refresh_ms); if (!mutex_trylock(&damon_sysfs_lock)) @@ -1607,6 +1608,9 @@ static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond) } kdamond->damon_ctx = ctx; + damon_sysfs_next_update_jiffies = + jiffies + msecs_to_jiffies(kdamond->refresh_ms); + repeat_call_control->fn = damon_sysfs_repeat_call_fn; repeat_call_control->data = kdamond; repeat_call_control->repeat = true; From 7d9f7d390f6af3a29614e81e802e2b9c238eb7b2 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Thu, 30 Oct 2025 01:03:33 +0000 Subject: [PATCH 0894/1075] scripts/decode_stacktrace.sh: fix build ID and PC source parsing Support for parsing PC source info in stacktraces (e.g. '(P)') was added in commit 2bff77c665ed ("scripts/decode_stacktrace.sh: fix decoding of lines with an additional info"). However, this logic was placed after the build ID processing. This incorrect order fails to parse lines containing both elements, e.g.: drm_gem_mmap_obj+0x114/0x200 [drm 03d0564e0529947d67bb2008c3548be77279fd27] (P) This patch fixes the problem by extracting the PC source info first and then processing the module build ID. With this change, the line above is now properly parsed as such: drm_gem_mmap_obj (./include/linux/mmap_lock.h:212 ./include/linux/mm.h:811 drivers/gpu/drm/drm_gem.c:1177) drm (P) While here, also add a brief explanation the build ID section. Link: https://lkml.kernel.org/r/20251030010347.2731925-1-cmllamas@google.com Fixes: 2bff77c665ed ("scripts/decode_stacktrace.sh: fix decoding of lines with an additional info") Signed-off-by: Carlos Llamas Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Luca Ceresoli Cc: Breno Leitao Cc: Catalin Marinas Cc: Marc Rutland Cc: Mark Brown Cc: Matthieu Baerts Cc: Miroslav Benes Cc: Puranjay Mohan Cc: Signed-off-by: Andrew Morton --- scripts/decode_stacktrace.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index c73cb802a0a3..8d01b741de62 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -277,12 +277,6 @@ handle_line() { fi done - if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then - words[$last-1]="${words[$last-1]} ${words[$last]}" - unset words[$last] spaces[$last] - last=$(( $last - 1 )) - fi - # Extract info after the symbol if present. E.g.: # func_name+0x54/0x80 (P) # ^^^ @@ -295,6 +289,14 @@ handle_line() { last=$(( $last - 1 )) fi + # Join module name with its build id if present, as these were + # split during tokenization (e.g. "[module" and "modbuildid]"). + if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then + words[$last-1]="${words[$last-1]} ${words[$last]}" + unset words[$last] spaces[$last] + last=$(( $last - 1 )) + fi + if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then module=${words[$last]} # some traces format is "(%pS)", which like "(foo+0x0/0x1 [bar])" From 9a6b60cb147d53968753a34805211d2e5e08c027 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Thu, 30 Oct 2025 07:51:52 +0900 Subject: [PATCH 0895/1075] nilfs2: avoid having an active sc_timer before freeing sci Because kthread_stop did not stop sc_task properly and returned -EINTR, the sc_timer was not properly closed, ultimately causing the problem [1] reported by syzbot when freeing sci due to the sc_timer not being closed. Because the thread sc_task main function nilfs_segctor_thread() returns 0 when it succeeds, when the return value of kthread_stop() is not 0 in nilfs_segctor_destroy(), we believe that it has not properly closed sc_timer. We use timer_shutdown_sync() to sync wait for sc_timer to shutdown, and set the value of sc_task to NULL under the protection of lock sc_state_lock, so as to avoid the issue caused by sc_timer not being properly shutdowned. [1] ODEBUG: free active (active state 0) object: 00000000dacb411a object type: timer_list hint: nilfs_construction_timeout Call trace: nilfs_segctor_destroy fs/nilfs2/segment.c:2811 [inline] nilfs_detach_log_writer+0x668/0x8cc fs/nilfs2/segment.c:2877 nilfs_put_super+0x4c/0x12c fs/nilfs2/super.c:509 Link: https://lkml.kernel.org/r/20251029225226.16044-1-konishi.ryusuke@gmail.com Fixes: 3f66cc261ccb ("nilfs2: use kthread_create and kthread_stop for the log writer thread") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+24d8b70f039151f65590@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=24d8b70f039151f65590 Tested-by: syzbot+24d8b70f039151f65590@syzkaller.appspotmail.com Signed-off-by: Edward Adam Davis Cc: [6.12+] Signed-off-by: Andrew Morton --- fs/nilfs2/segment.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index f15ca6fc400d..deee16bc9d4e 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2768,7 +2768,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) if (sci->sc_task) { wake_up(&sci->sc_wait_daemon); - kthread_stop(sci->sc_task); + if (kthread_stop(sci->sc_task)) { + spin_lock(&sci->sc_state_lock); + sci->sc_task = NULL; + timer_shutdown_sync(&sci->sc_timer); + spin_unlock(&sci->sc_state_lock); + } } spin_lock(&sci->sc_state_lock); From adfb6609c6809e107ded9a1cd46f519c882e64ea Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 31 Oct 2025 16:57:50 +0000 Subject: [PATCH 0896/1075] mm/huge_memory: initialise the tags of the huge zero folio On arm64 with MTE enabled, a page mapped as Normal Tagged (PROT_MTE) in user space will need to have its allocation tags initialised. This is normally done in the arm64 set_pte_at() after checking the memory attributes. Such page is also marked with the PG_mte_tagged flag to avoid subsequent clearing. Since this relies on having a struct page, pte_special() mappings are ignored. Commit d82d09e48219 ("mm/huge_memory: mark PMD mappings of the huge zero folio special") maps the huge zero folio special and the arm64 set_pmd_at() will no longer zero the tags. There is no guarantee that the tags are zero, especially if parts of this huge page have been previously tagged. It's fairly easy to detect this by regularly dropping the caches to force the reallocation of the huge zero folio. Allocate the huge zero folio with the __GFP_ZEROTAGS flag. In addition, do not warn in the arm64 __access_remote_tags() when reading tags from the huge zero page. I bundled the arm64 change in here as well since they are both related to the commit mapping the huge zero folio as special. [catalin.marinas@arm.com: handle arch mte_zero_clear_page_tags() code issuing MTE instructions] Link: https://lkml.kernel.org/r/aQi8dA_QpXM8XqrE@arm.com Link: https://lkml.kernel.org/r/20251031170133.280742-1-catalin.marinas@arm.com Fixes: d82d09e48219 ("mm/huge_memory: mark PMD mappings of the huge zero folio special") Signed-off-by: Catalin Marinas Acked-by: David Hildenbrand Reviewed-by: Lance Yang Tested-by: Beleswar Padhi Cc: Will Deacon Cc: Mark Brown Cc: Aishwarya TCV Cc: David Hildenbrand (Red Hat) Signed-off-by: Andrew Morton --- arch/arm64/kernel/mte.c | 3 ++- arch/arm64/mm/fault.c | 10 ++++++++++ mm/huge_memory.c | 3 ++- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 43f7a2f39403..32148bf09c1d 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -476,7 +476,8 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, folio = page_folio(page); if (folio_test_hugetlb(folio)) - WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); + WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) && + !is_huge_zero_folio(folio)); else WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d816ff44faff..125dfa6c613b 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -969,6 +969,16 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { + /* + * Check if MTE is supported and fall back to clear_highpage(). + * get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and + * post_alloc_hook() will invoke tag_clear_highpage(). + */ + if (!system_supports_mte()) { + clear_highpage(page); + return; + } + /* Newly allocated page, shouldn't have been tagged yet */ WARN_ON_ONCE(!try_page_mte_tagging(page)); mte_zero_clear_page_tags(page_address(page)); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b4ff49d96501..323654fb4f8c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -214,7 +214,8 @@ static bool get_huge_zero_folio(void) if (likely(atomic_inc_not_zero(&huge_zero_refcount))) return true; - zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, + zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO | __GFP_ZEROTAGS) & + ~__GFP_MOVABLE, HPAGE_PMD_ORDER); if (!zero_folio) { count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED); From 6f86d0534fddfbd08687fa0f01479d4226bc3c3d Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Fri, 31 Oct 2025 20:09:55 +0800 Subject: [PATCH 0897/1075] mm/secretmem: fix use-after-free race in fault handler When a page fault occurs in a secret memory file created with `memfd_secret(2)`, the kernel will allocate a new folio for it, mark the underlying page as not-present in the direct map, and add it to the file mapping. If two tasks cause a fault in the same page concurrently, both could end up allocating a folio and removing the page from the direct map, but only one would succeed in adding the folio to the file mapping. The task that failed undoes the effects of its attempt by (a) freeing the folio again and (b) putting the page back into the direct map. However, by doing these two operations in this order, the page becomes available to the allocator again before it is placed back in the direct mapping. If another task attempts to allocate the page between (a) and (b), and the kernel tries to access it via the direct map, it would result in a supervisor not-present page fault. Fix the ordering to restore the direct map before the folio is freed. Link: https://lkml.kernel.org/r/20251031120955.92116-1-lance.yang@linux.dev Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas") Signed-off-by: Lance Yang Reported-by: Google Big Sleep Closes: https://lore.kernel.org/linux-mm/CAEXGt5QeDpiHTu3K9tvjUTPqo+d-=wuCNYPa+6sWKrdQJ-ATdg@mail.gmail.com/ Acked-by: David Hildenbrand Reviewed-by: Mike Rapoport (Microsoft) Reviewed-by: Lorenzo Stoakes Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton --- mm/secretmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/secretmem.c b/mm/secretmem.c index 60137305bc20..b59350daffe3 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -82,13 +82,13 @@ static vm_fault_t secretmem_fault(struct vm_fault *vmf) __folio_mark_uptodate(folio); err = filemap_add_folio(mapping, folio, offset, gfp); if (unlikely(err)) { - folio_put(folio); /* * If a split of large page was required, it * already happened when we marked the page invalid * which guarantees that this call won't fail */ set_direct_map_default_noflush(folio_page(folio, 0)); + folio_put(folio); if (err == -EEXIST) goto retry; From bba717bbc466ab24d25964034f5e16ead1720512 Mon Sep 17 00:00:00 2001 From: Chris Li Date: Sun, 2 Nov 2025 07:11:07 -0800 Subject: [PATCH 0898/1075] MAINTAINERS: add Chris and Kairui as the swap maintainer We have been collaborating on a systematic effort to clean up and improve the Linux swap system, and might as well take responsibility for it. Link: https://lkml.kernel.org/r/20251102-swap-m-v1-1-582f275d5bce@kernel.org Signed-off-by: Chris Li Acked-by: Kairui Song Acked-by: Barry Song Acked-by: Baoquan He Cc: Johannes Weiner Cc: Kemeng Shi Cc: Michal Hocko Cc: Minchan Kim Cc: Nhat Pham Cc: Roman Gushchin Cc: SeongJae Park Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index ddecf1ef3bed..5b93346f464f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16498,12 +16498,12 @@ F: mm/secretmem.c MEMORY MANAGEMENT - SWAP M: Andrew Morton +M: Chris Li +M: Kairui Song R: Kemeng Shi -R: Kairui Song R: Nhat Pham R: Baoquan He R: Barry Song -R: Chris Li L: linux-mm@kvack.org S: Maintained F: Documentation/mm/swap-table.rst From 0b07092d09e54e49b85379a9c60f82d54a881514 Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Mon, 3 Nov 2025 12:01:57 +0100 Subject: [PATCH 0899/1075] kho: fix out-of-bounds access of vmalloc chunk The list of pages in a vmalloc chunk is NULL-terminated. So when looping through the pages in a vmalloc chunk, both kho_restore_vmalloc() and kho_vmalloc_unpreserve_chunk() rightly make sure to stop when encountering a NULL page. But when the chunk is full, the loops do not stop and go past the bounds of chunk->phys, resulting in out-of-bounds memory access, and possibly the restoration or unpreservation of an invalid page. Fix this by making sure the processing of chunk stops at the end of the array. Link: https://lkml.kernel.org/r/20251103110159.8399-1-pratyush@kernel.org Fixes: a667300bd53f ("kho: add support for preserving vmalloc allocations") Signed-off-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Baoquan He Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- kernel/kexec_handover.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 2a8c20c238a8..36fdce2667c5 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -889,7 +889,7 @@ static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk) __kho_unpreserve(track, pfn, pfn + 1); - for (int i = 0; chunk->phys[i]; i++) { + for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { pfn = PHYS_PFN(chunk->phys[i]); __kho_unpreserve(track, pfn, pfn + 1); } @@ -1012,7 +1012,7 @@ void *kho_restore_vmalloc(const struct kho_vmalloc *preservation) while (chunk) { struct page *page; - for (int i = 0; chunk->phys[i]; i++) { + for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { phys_addr_t phys = chunk->phys[i]; if (idx + contig_pages > total_pages) From 7ecd2e439d1272ac02d798b0033a426e3b00dff5 Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Mon, 3 Nov 2025 19:02:31 +0100 Subject: [PATCH 0900/1075] kho: fix unpreservation of higher-order vmalloc preservations kho_vmalloc_unpreserve_chunk() calls __kho_unpreserve() with end_pfn as pfn + 1. This happens to work for 0-order pages, but leaks higher order pages. For example, say order 2 pages back the allocation. During preservation, they get preserved in the order 2 bitmaps, but kho_vmalloc_unpreserve_chunk() would try to unpreserve them from the order 0 bitmaps, which should not have these bits set anyway, leaving the order 2 bitmaps untouched. This results in the pages being carried over to the next kernel. Nothing will free those pages in the next boot, leaking them. Fix this by taking the order into account when calculating the end PFN for __kho_unpreserve(). Link: https://lkml.kernel.org/r/20251103180235.71409-2-pratyush@kernel.org Fixes: a667300bd53f ("kho: add support for preserving vmalloc allocations") Signed-off-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Baoquan He Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- kernel/kexec_handover.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index 36fdce2667c5..e0bafe7c0ded 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -882,7 +882,8 @@ static struct kho_vmalloc_chunk *new_vmalloc_chunk(struct kho_vmalloc_chunk *cur return NULL; } -static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk) +static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk, + unsigned short order) { struct kho_mem_track *track = &kho_out.ser.track; unsigned long pfn = PHYS_PFN(virt_to_phys(chunk)); @@ -891,7 +892,7 @@ static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk) for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { pfn = PHYS_PFN(chunk->phys[i]); - __kho_unpreserve(track, pfn, pfn + 1); + __kho_unpreserve(track, pfn, pfn + (1 << order)); } } @@ -902,7 +903,7 @@ static void kho_vmalloc_free_chunks(struct kho_vmalloc *kho_vmalloc) while (chunk) { struct kho_vmalloc_chunk *tmp = chunk; - kho_vmalloc_unpreserve_chunk(chunk); + kho_vmalloc_unpreserve_chunk(chunk, kho_vmalloc->order); chunk = KHOSER_LOAD_PTR(chunk->hdr.next); free_page((unsigned long)tmp); From b05addf6f0596edb1f82ab4059438c7ef2d2686d Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Mon, 3 Nov 2025 19:02:32 +0100 Subject: [PATCH 0901/1075] kho: warn and exit when unpreserved page wasn't preserved Calling __kho_unpreserve() on a pair of (pfn, end_pfn) that wasn't preserved is a bug. Currently, if that is done, the physxa or bits can be NULL. This results in a soft lockup since a NULL physxa or bits results in redoing the loop without ever making any progress. Return when physxa or bits are not found, but WARN first to loudly indicate invalid behaviour. Link: https://lkml.kernel.org/r/20251103180235.71409-3-pratyush@kernel.org Fixes: fc33e4b44b27 ("kexec: enable KHO support for memory preservation") Signed-off-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Baoquan He Cc: Pasha Tatashin Cc: Signed-off-by: Andrew Morton --- kernel/kexec_handover.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/kexec_handover.c b/kernel/kexec_handover.c index e0bafe7c0ded..03d12e27189f 100644 --- a/kernel/kexec_handover.c +++ b/kernel/kexec_handover.c @@ -171,12 +171,12 @@ static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn, const unsigned long pfn_high = pfn >> order; physxa = xa_load(&track->orders, order); - if (!physxa) - continue; + if (WARN_ON_ONCE(!physxa)) + return; bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS); - if (!bits) - continue; + if (WARN_ON_ONCE(!bits)) + return; clear_bit(pfn_high % PRESERVE_BITS, bits->preserve); From 05a1fc5efdd8560f34a3af39c9cf1e1526cc3ddf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 9 Nov 2025 10:12:07 +0100 Subject: [PATCH 0902/1075] ALSA: usb-audio: Fix potential overflow of PCM transfer buffer The PCM stream data in USB-audio driver is transferred over USB URB packet buffers, and each packet size is determined dynamically. The packet sizes are limited by some factors such as wMaxPacketSize USB descriptor. OTOH, in the current code, the actually used packet sizes are determined only by the rate and the PPS, which may be bigger than the size limit above. This results in a buffer overflow, as reported by syzbot. Basically when the limit is smaller than the calculated packet size, it implies that something is wrong, most likely a weird USB descriptor. So the best option would be just to return an error at the parameter setup time before doing any further operations. This patch introduces such a sanity check, and returns -EINVAL when the packet size is greater than maxpacksize. The comparison with ep->packsize[1] alone should suffice since it's always equal or greater than ep->packsize[0]. Reported-by: syzbot+bfd77469c8966de076f7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=bfd77469c8966de076f7 Link: https://lore.kernel.org/690b6b46.050a0220.3d0d33.0054.GAE@google.com Cc: Lizhi Xu Cc: Link: https://patch.msgid.link/20251109091211.12739-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/endpoint.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 880f5afcce60..cc15624ecaff 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -1362,6 +1362,11 @@ int snd_usb_endpoint_set_params(struct snd_usb_audio *chip, ep->sample_rem = ep->cur_rate % ep->pps; ep->packsize[0] = ep->cur_rate / ep->pps; ep->packsize[1] = (ep->cur_rate + (ep->pps - 1)) / ep->pps; + if (ep->packsize[1] > ep->maxpacksize) { + usb_audio_dbg(chip, "Too small maxpacksize %u for rate %u / pps %u\n", + ep->maxpacksize, ep->cur_rate, ep->pps); + return -EINVAL; + } /* calculate the frequency in 16.16 format */ ep->freqm = ep->freqn; From 66e9feb03e7cf8983b1d0c540e2dad90d5146d48 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 9 Nov 2025 16:53:39 +0100 Subject: [PATCH 0903/1075] spi: Add TODO comment about ACPI GPIO setup Add a TODO comment that ideally the ACPI/gpiolib core code should take care of setting GPIO direction and/or bias according to ACPI GPIO resources. If this TODO gets implemented then the acpi_dev_gpio_irq_get() call in acpi_register_spi_device() can be dropped. Suggested-by: Andy Shevchenko Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20251109155340.26199-1-johannes.goede@oss.qualcomm.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 8588e8562220..e25df9990f82 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2857,6 +2857,8 @@ static acpi_status acpi_register_spi_device(struct spi_controller *ctlr, * here too, because this call sets the GPIO direction and/or bias. * Setting these needs to be done even if there is no driver, in which * case spi_probe() will never get called. + * TODO: ideally the setup of the GPIO should be handled in a generic + * manner in the ACPI/gpiolib core code. */ if (spi->irq < 0) spi->irq = acpi_dev_gpio_irq_get(adev, 0); From 4aa17144d5abc3c756883e3a010246f0dba8b468 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 14 Oct 2025 13:59:59 -0400 Subject: [PATCH 0904/1075] NFSD: free copynotify stateid in nfs4_free_ol_stateid() Typically copynotify stateid is freed either when parent's stateid is being close/freed or in nfsd4_laundromat if the stateid hasn't been used in a lease period. However, in case when the server got an OPEN (which created a parent stateid), followed by a COPY_NOTIFY using that stateid, followed by a client reboot. New client instance while doing CREATE_SESSION would force expire previous state of this client. It leads to the open state being freed thru release_openowner-> nfs4_free_ol_stateid() and it finds that it still has copynotify stateid associated with it. We currently print a warning and is triggerred WARNING: CPU: 1 PID: 8858 at fs/nfsd/nfs4state.c:1550 nfs4_free_ol_stateid+0xb0/0x100 [nfsd] This patch, instead, frees the associated copynotify stateid here. If the parent stateid is freed (without freeing the copynotify stateids associated with it), it leads to the list corruption when laundromat ends up freeing the copynotify state later. [ 1626.839430] Internal error: Oops - BUG: 00000000f2000800 [#1] SMP [ 1626.842828] Modules linked in: nfnetlink_queue nfnetlink_log bluetooth cfg80211 rpcrdma rdma_cm iw_cm ib_cm ib_core nfsd nfs_acl lockd grace nfs_localio ext4 crc16 mbcache jbd2 overlay uinput snd_seq_dummy snd_hrtimer qrtr rfkill vfat fat uvcvideo snd_hda_codec_generic videobuf2_vmalloc videobuf2_memops snd_hda_intel uvc snd_intel_dspcfg videobuf2_v4l2 videobuf2_common snd_hda_codec snd_hda_core videodev snd_hwdep snd_seq mc snd_seq_device snd_pcm snd_timer snd soundcore sg loop auth_rpcgss vsock_loopback vmw_vsock_virtio_transport_common vmw_vsock_vmci_transport vmw_vmci vsock xfs 8021q garp stp llc mrp nvme ghash_ce e1000e nvme_core sr_mod nvme_keyring nvme_auth cdrom vmwgfx drm_ttm_helper ttm sunrpc dm_mirror dm_region_hash dm_log iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse dm_multipath dm_mod nfnetlink [ 1626.855594] CPU: 2 UID: 0 PID: 199 Comm: kworker/u24:33 Kdump: loaded Tainted: G B W 6.17.0-rc7+ #22 PREEMPT(voluntary) [ 1626.857075] Tainted: [B]=BAD_PAGE, [W]=WARN [ 1626.857573] Hardware name: VMware, Inc. VMware20,1/VBSA, BIOS VMW201.00V.24006586.BA64.2406042154 06/04/2024 [ 1626.858724] Workqueue: nfsd4 laundromat_main [nfsd] [ 1626.859304] pstate: 61400005 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) [ 1626.860010] pc : __list_del_entry_valid_or_report+0x148/0x200 [ 1626.860601] lr : __list_del_entry_valid_or_report+0x148/0x200 [ 1626.861182] sp : ffff8000881d7a40 [ 1626.861521] x29: ffff8000881d7a40 x28: 0000000000000018 x27: ffff0000c2a98200 [ 1626.862260] x26: 0000000000000600 x25: 0000000000000000 x24: ffff8000881d7b20 [ 1626.862986] x23: ffff0000c2a981e8 x22: 1fffe00012410e7d x21: ffff0000920873e8 [ 1626.863701] x20: ffff0000920873e8 x19: ffff000086f22998 x18: 0000000000000000 [ 1626.864421] x17: 20747562202c3839 x16: 3932326636383030 x15: 3030666666662065 [ 1626.865092] x14: 6220646c756f6873 x13: 0000000000000001 x12: ffff60004fd9e4a3 [ 1626.865713] x11: 1fffe0004fd9e4a2 x10: ffff60004fd9e4a2 x9 : dfff800000000000 [ 1626.866320] x8 : 00009fffb0261b5e x7 : ffff00027ecf2513 x6 : 0000000000000001 [ 1626.866938] x5 : ffff00027ecf2510 x4 : ffff60004fd9e4a3 x3 : 0000000000000000 [ 1626.867553] x2 : 0000000000000000 x1 : ffff000096069640 x0 : 000000000000006d [ 1626.868167] Call trace: [ 1626.868382] __list_del_entry_valid_or_report+0x148/0x200 (P) [ 1626.868876] _free_cpntf_state_locked+0xd0/0x268 [nfsd] [ 1626.869368] nfs4_laundromat+0x6f8/0x1058 [nfsd] [ 1626.869813] laundromat_main+0x24/0x60 [nfsd] [ 1626.870231] process_one_work+0x584/0x1050 [ 1626.870595] worker_thread+0x4c4/0xc60 [ 1626.870893] kthread+0x2f8/0x398 [ 1626.871146] ret_from_fork+0x10/0x20 [ 1626.871422] Code: aa1303e1 aa1403e3 910e8000 97bc55d7 (d4210000) [ 1626.871892] SMP: stopping secondary CPUs Reported-by: rtm@csail.mit.edu Closes: https://lore.kernel.org/linux-nfs/d8f064c1-a26f-4eed-b4f0-1f7f608f415f@oracle.com/T/#t Fixes: 624322f1adc5 ("NFSD add COPY_NOTIFY operation") Cc: stable@vger.kernel.org Signed-off-by: Olga Kornievskaia Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c1b54322c412..de763e6d9b58 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1542,7 +1542,8 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) release_all_access(stp); if (stp->st_stateowner) nfs4_put_stateowner(stp->st_stateowner); - WARN_ON(!list_empty(&stid->sc_cp_list)); + if (!list_empty(&stid->sc_cp_list)) + nfs4_free_cpntf_statelist(stid->sc_client->net, stid); kmem_cache_free(stateid_slab, stid); } From ff8141e49cf70d2d093a5228f5299ce188de6142 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Oct 2025 09:49:55 -0400 Subject: [PATCH 0905/1075] NFSD: Skip close replay processing if XDR encoding fails The replay logic added by commit 9411b1d4c7df ("nfsd4: cleanup handling of nfsv4.0 closed stateid's") cannot be done if encoding failed due to a short send buffer; there's no guarantee that the operation encoder has actually encoded the data that is being copied to the replay cache. Reported-by: rtm@csail.mit.edu Closes: https://lore.kernel.org/linux-nfs/c3628d57-94ae-48cf-8c9e-49087a28cec9@oracle.com/T/#t Fixes: 9411b1d4c7df ("nfsd4: cleanup handling of nfsv4.0 closed stateid's") Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6040a6145dad..cf5df0f50208 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5925,8 +5925,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) */ warn_on_nonidempotent_op(op); xdr_truncate_encode(xdr, op_status_offset + XDR_UNIT); - } - if (so) { + } else if (so) { int len = xdr->buf->len - (op_status_offset + XDR_UNIT); so->so_replay.rp_status = op->status; From c96573c0d75db3f8478000d0d392a9cdb95adbed Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Oct 2025 09:49:56 -0400 Subject: [PATCH 0906/1075] NFSD: Never cache a COMPOUND when the SEQUENCE operation fails RFC 8881 normatively mandates that operations where the initial SEQUENCE operation in a compound fails must not modify the slot's replay cache. nfsd4_cache_this() doesn't prevent such caching. So when SEQUENCE fails, cstate.data_offset is not set, allowing read_bytes_from_xdr_buf() to access uninitialized memory. Reported-by: rtm@csail.mit.edu Closes: https://lore.kernel.org/linux-nfs/c3628d57-94ae-48cf-8c9e-49087a28cec9@oracle.com/T/#t Fixes: 468de9e54a90 ("nfsd41: expand solo sequence check") Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index de763e6d9b58..c8c326679dca 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3487,7 +3487,20 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) struct nfsd4_slot *slot = resp->cstate.slot; unsigned int base; - dprintk("--> %s slot %p\n", __func__, slot); + /* + * RFC 5661 Section 2.10.6.1.2: + * + * Any time SEQUENCE ... returns an error ... [t]he replier MUST NOT + * modify the reply cache entry for the slot whenever an error is + * returned from SEQUENCE ... + * + * Because nfsd4_store_cache_entry is called only by + * nfsd4_sequence_done(), nfsd4_store_cache_entry() is called only + * when a SEQUENCE operation was part of the COMPOUND. + * nfs41_check_op_ordering() ensures SEQUENCE is the first op. + */ + if (resp->opcnt == 1 && resp->cstate.status != nfs_ok) + return; slot->sl_flags |= NFSD4_SLOT_INITIALIZED; slot->sl_opcnt = resp->opcnt; From 1cff14b7fc7f31363c39d0269563ce75c714f7ae Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 16 Oct 2025 09:49:57 -0400 Subject: [PATCH 0907/1075] nfsd: ensure SEQUENCE replay sends a valid reply. nfsd4_enc_sequence_replay() uses nfsd4_encode_operation() to encode a new SEQUENCE reply when replaying a request from the slot cache - only ops after the SEQUENCE are replayed from the cache in ->sl_data. However it does this in nfsd4_replay_cache_entry() which is called *before* nfsd4_sequence() has filled in reply fields. This means that in the replayed SEQUENCE reply: maxslots will be whatever the client sent target_maxslots will be -1 (assuming init to zero, and nfsd4_encode_sequence() subtracts 1) status_flags will be zero The incorrect maxslots value, in particular, can cause the client to think the slot table has been reduced in size so it can discard its knowledge of current sequence number of the later slots, though the server has not discarded those slots. When the client later wants to use a later slot, it can get NFS4ERR_SEQ_MISORDERED from the server. This patch moves the setup of the reply into a new helper function and call it *before* nfsd4_replay_cache_entry() is called. Only one of the updated fields was used after this point - maxslots. So the nfsd4_sequence struct has been extended to have separate maxslots for the request and the response. Reported-by: Olga Kornievskaia Closes: https://lore.kernel.org/linux-nfs/20251010194449.10281-1-okorniev@redhat.com/ Tested-by: Olga Kornievskaia Signed-off-by: NeilBrown Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4state.c | 50 ++++++++++++++++++++++++++++++--------------- fs/nfsd/nfs4xdr.c | 2 +- fs/nfsd/xdr4.h | 3 ++- 3 files changed, 36 insertions(+), 19 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index c8c326679dca..8a6960500217 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4363,6 +4363,36 @@ static bool replay_matches_cache(struct svc_rqst *rqstp, return true; } +/* + * Note that the response is constructed here both for the case + * of a new SEQUENCE request and for a replayed SEQUENCE request. + * We do not cache SEQUENCE responses as SEQUENCE is idempotent. + */ +static void nfsd4_construct_sequence_response(struct nfsd4_session *session, + struct nfsd4_sequence *seq) +{ + struct nfs4_client *clp = session->se_client; + + seq->maxslots_response = max(session->se_target_maxslots, + seq->maxslots); + seq->target_maxslots = session->se_target_maxslots; + + switch (clp->cl_cb_state) { + case NFSD4_CB_DOWN: + seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; + break; + case NFSD4_CB_FAULT: + seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; + break; + default: + seq->status_flags = 0; + } + if (!list_empty(&clp->cl_revoked)) + seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; + if (atomic_read(&clp->cl_admin_revoked)) + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -4412,6 +4442,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, dprintk("%s: slotid %d\n", __func__, seq->slotid); trace_nfsd_slot_seqid_sequence(clp, seq, slot); + + nfsd4_construct_sequence_response(session, seq); + status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags); if (status == nfserr_replay_cache) { status = nfserr_seq_misordered; @@ -4509,23 +4542,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } out: - seq->maxslots = max(session->se_target_maxslots, seq->maxslots); - seq->target_maxslots = session->se_target_maxslots; - - switch (clp->cl_cb_state) { - case NFSD4_CB_DOWN: - seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; - break; - case NFSD4_CB_FAULT: - seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; - break; - default: - seq->status_flags = 0; - } - if (!list_empty(&clp->cl_revoked)) - seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; - if (atomic_read(&clp->cl_admin_revoked)) - seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED; trace_nfsd_seq4_status(rqstp, seq); out_no_session: if (conn) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index cf5df0f50208..67bb9c0b9fcb 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -5073,7 +5073,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr; /* Note slotid's are numbered from zero: */ /* sr_highest_slotid */ - nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); + nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots_response - 1); if (nfserr != nfs_ok) return nfserr; /* sr_target_highest_slotid */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index ee0570cbdd9e..1ce8e12ae335 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -574,8 +574,9 @@ struct nfsd4_sequence { struct nfs4_sessionid sessionid; /* request/response */ u32 seqid; /* request/response */ u32 slotid; /* request/response */ - u32 maxslots; /* request/response */ + u32 maxslots; /* request */ u32 cachethis; /* request */ + u32 maxslots_response; /* response */ u32 target_maxslots; /* response */ u32 status_flags; /* response */ }; From 324be6dcbf09133a322db16977a84fbb45c16129 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 16 Oct 2025 11:09:13 -0400 Subject: [PATCH 0908/1075] Revert "SUNRPC: Make RPCSEC_GSS_KRB5 select CRYPTO instead of depending on it" Geert reports: > This is now commit d8e97cc476e33037 ("SUNRPC: Make RPCSEC_GSS_KRB5 > select CRYPTO instead of depending on it") in v6.18-rc1. > As RPCSEC_GSS_KRB5 defaults to "y", CRYPTO is now auto-enabled in > defconfigs that didn't enable it before. Revert while we work out a proper solution and then test it. Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/linux-nfs/b97cea29-4ab7-4fb6-85ba-83f9830e524f@kernel.org/T/#t Signed-off-by: Chuck Lever --- net/sunrpc/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 33aafdc8392e..2d8b67dac7b5 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -18,10 +18,9 @@ config SUNRPC_SWAP config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism" - depends on SUNRPC + depends on SUNRPC && CRYPTO default y select SUNRPC_GSS - select CRYPTO select CRYPTO_SKCIPHER select CRYPTO_HASH help From 576c930e5e7dcb937648490611a83f1bf0171048 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 7 Nov 2025 18:12:14 +0100 Subject: [PATCH 0909/1075] drm/panthor: Flush shmem writes before mapping buffers CPU-uncached The shmem layer zeroes out the new pages using cached mappings, and if we don't CPU-flush we might leave dirty cachelines behind, leading to potential data leaks and/or asynchronous buffer corruption when dirty cachelines are evicted. Fixes: 8a1cc07578bf ("drm/panthor: Add GEM logical block") Signed-off-by: Boris Brezillon Reviewed-by: Steven Price Reviewed-by: Liviu Dudau Signed-off-by: Steven Price Link: https://patch.msgid.link/20251107171214.1186299-1-boris.brezillon@collabora.com --- drivers/gpu/drm/panthor/panthor_gem.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c index 156c7a0b62a2..3f43686f0195 100644 --- a/drivers/gpu/drm/panthor/panthor_gem.c +++ b/drivers/gpu/drm/panthor/panthor_gem.c @@ -288,6 +288,23 @@ panthor_gem_create_with_handle(struct drm_file *file, panthor_gem_debugfs_set_usage_flags(bo, 0); + /* If this is a write-combine mapping, we query the sgt to force a CPU + * cache flush (dma_map_sgtable() is called when the sgt is created). + * This ensures the zero-ing is visible to any uncached mapping created + * by vmap/mmap. + * FIXME: Ideally this should be done when pages are allocated, not at + * BO creation time. + */ + if (shmem->map_wc) { + struct sg_table *sgt; + + sgt = drm_gem_shmem_get_pages_sgt(shmem); + if (IS_ERR(sgt)) { + ret = PTR_ERR(sgt); + goto out_put_gem; + } + } + /* * Allocate an id of idr table where the obj is registered * and handle has the id what user can see. @@ -296,6 +313,7 @@ panthor_gem_create_with_handle(struct drm_file *file, if (!ret) *size = bo->base.base.size; +out_put_gem: /* drop reference from allocate - handle holds it now. */ drm_gem_object_put(&shmem->base); From 994dec10991b53beac3e16109d876ae363e8a329 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 6 Nov 2025 22:00:00 +0200 Subject: [PATCH 0910/1075] drm/i915/psr: fix pipe to vblank conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First, we can't assume pipe == crtc index. If a pipe is fused off in between, it no longer holds. intel_crtc_for_pipe() is the only proper way to get from a pipe to the corresponding crtc. Second, drivers aren't supposed to access or index drm->vblank[] directly. There's drm_crtc_vblank_crtc() for this. Use both functions to fix the pipe to vblank conversion. Fixes: f02658c46cf7 ("drm/i915/psr: Add mechanism to notify PSR of pipe enable/disable") Cc: Jouni Högander Cc: stable@vger.kernel.org # v6.16+ Reviewed-by: Jouni Högander Link: https://patch.msgid.link/20251106200000.1455164-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 2750f6765d6974f7e163c5d540a96c8703f6d8dd) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_psr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index 10eb93a34cf2..d5e0a1e66944 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -888,7 +888,8 @@ static bool is_dc5_dc6_blocked(struct intel_dp *intel_dp) { struct intel_display *display = to_intel_display(intel_dp); u32 current_dc_state = intel_display_power_get_current_dc_state(display); - struct drm_vblank_crtc *vblank = &display->drm->vblank[intel_dp->psr.pipe]; + struct intel_crtc *crtc = intel_crtc_for_pipe(display, intel_dp->psr.pipe); + struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(&crtc->base); return (current_dc_state != DC_STATE_EN_UPTO_DC5 && current_dc_state != DC_STATE_EN_UPTO_DC6) || From 7aca00d950e782e66c34fbd045c9605eca343a36 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 18 Oct 2025 20:10:33 -0400 Subject: [PATCH 0911/1075] pnfs: Fix TLS logic in _nfs4_pnfs_v3_ds_connect() Don't try to add an RDMA transport to a client that is already marked as being a TCP/TLS transport. Fixes: 04a15263662a ("pnfs/flexfiles: connect to NFSv3 DS using TLS if MDS connection uses TLS") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs_nfs.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 7b32afb29782..ff48056bf750 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -809,8 +809,11 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, unsigned int retrans) { struct nfs_client *clp = ERR_PTR(-EIO); + struct nfs_client *mds_clp = mds_srv->nfs_client; + enum xprtsec_policies xprtsec_policy = mds_clp->cl_xprtsec.policy; struct nfs4_pnfs_ds_addr *da; unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10; + int ds_proto; int status = 0; dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); @@ -834,27 +837,28 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, .xprtsec = clp->cl_xprtsec, }; - if (da->da_transport != clp->cl_proto && - clp->cl_proto != XPRT_TRANSPORT_TCP_TLS) - continue; - if (da->da_transport == XPRT_TRANSPORT_TCP && - mds_srv->nfs_client->cl_proto == XPRT_TRANSPORT_TCP_TLS) + if (xprt_args.ident == XPRT_TRANSPORT_TCP && + clp->cl_proto == XPRT_TRANSPORT_TCP_TLS) xprt_args.ident = XPRT_TRANSPORT_TCP_TLS; - if (da->da_addr.ss_family != clp->cl_addr.ss_family) + if (xprt_args.ident != clp->cl_proto) + continue; + if (xprt_args.dstaddr->sa_family != + clp->cl_addr.ss_family) continue; /* Add this address as an alias */ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, - rpc_clnt_test_and_add_xprt, NULL); + rpc_clnt_test_and_add_xprt, NULL); continue; } - if (da->da_transport == XPRT_TRANSPORT_TCP && - mds_srv->nfs_client->cl_proto == XPRT_TRANSPORT_TCP_TLS) - da->da_transport = XPRT_TRANSPORT_TCP_TLS; - clp = get_v3_ds_connect(mds_srv, - &da->da_addr, - da->da_addrlen, da->da_transport, - timeo, retrans); + + ds_proto = da->da_transport; + if (ds_proto == XPRT_TRANSPORT_TCP && + xprtsec_policy != RPC_XPRTSEC_NONE) + ds_proto = XPRT_TRANSPORT_TCP_TLS; + + clp = get_v3_ds_connect(mds_srv, &da->da_addr, da->da_addrlen, + ds_proto, timeo, retrans); if (IS_ERR(clp)) continue; clp->cl_rpcclient->cl_softerr = 0; From 28e19737e1570c7c71890547c2e43c3e0da79df9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 18 Oct 2025 20:10:34 -0400 Subject: [PATCH 0912/1075] pnfs: Fix TLS logic in _nfs4_pnfs_v4_ds_connect() Don't try to add an RDMA transport to a client that is already marked as being a TCP/TLS transport. Fixes: a35518cae4b3 ("NFSv4.1/pnfs: fix NFS with TLS in pnfs") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs_nfs.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index ff48056bf750..9976cc16b689 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -884,7 +884,10 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, u32 minor_version) { struct nfs_client *clp = ERR_PTR(-EIO); + struct nfs_client *mds_clp = mds_srv->nfs_client; + enum xprtsec_policies xprtsec_policy = mds_clp->cl_xprtsec.policy; struct nfs4_pnfs_ds_addr *da; + int ds_proto; int status = 0; dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); @@ -912,12 +915,8 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, .data = &xprtdata, }; - if (da->da_transport != clp->cl_proto && - clp->cl_proto != XPRT_TRANSPORT_TCP_TLS) - continue; - if (da->da_transport == XPRT_TRANSPORT_TCP && - mds_srv->nfs_client->cl_proto == - XPRT_TRANSPORT_TCP_TLS) { + if (xprt_args.ident == XPRT_TRANSPORT_TCP && + clp->cl_proto == XPRT_TRANSPORT_TCP_TLS) { struct sockaddr *addr = (struct sockaddr *)&da->da_addr; struct sockaddr_in *sin = @@ -948,7 +947,10 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, xprt_args.ident = XPRT_TRANSPORT_TCP_TLS; xprt_args.servername = servername; } - if (da->da_addr.ss_family != clp->cl_addr.ss_family) + if (xprt_args.ident != clp->cl_proto) + continue; + if (xprt_args.dstaddr->sa_family != + clp->cl_addr.ss_family) continue; /** @@ -962,15 +964,14 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, if (xprtdata.cred) put_cred(xprtdata.cred); } else { - if (da->da_transport == XPRT_TRANSPORT_TCP && - mds_srv->nfs_client->cl_proto == - XPRT_TRANSPORT_TCP_TLS) - da->da_transport = XPRT_TRANSPORT_TCP_TLS; - clp = nfs4_set_ds_client(mds_srv, - &da->da_addr, - da->da_addrlen, - da->da_transport, timeo, - retrans, minor_version); + ds_proto = da->da_transport; + if (ds_proto == XPRT_TRANSPORT_TCP && + xprtsec_policy != RPC_XPRTSEC_NONE) + ds_proto = XPRT_TRANSPORT_TCP_TLS; + + clp = nfs4_set_ds_client(mds_srv, &da->da_addr, + da->da_addrlen, ds_proto, + timeo, retrans, minor_version); if (IS_ERR(clp)) continue; @@ -981,7 +982,6 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, clp = ERR_PTR(-EIO); continue; } - } } From 8ab523ce78d4ca13add6b4ecbacff0f84c274603 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 18 Oct 2025 20:10:35 -0400 Subject: [PATCH 0913/1075] pnfs: Set transport security policy to RPC_XPRTSEC_NONE unless using TLS The default setting for the transport security policy must be RPC_XPRTSEC_NONE, when using a TCP or RDMA connection without TLS. Conversely, when using TLS, the security policy needs to be set. Fixes: 6c0a8c5fcf71 ("NFS: Have struct nfs_client carry a TLS policy field") Signed-off-by: Trond Myklebust Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfs3client.c | 14 ++++++++++++-- fs/nfs/nfs4client.c | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 0d7310c1ee0c..5d97c1d38bb6 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "internal.h" #include "nfs3_fs.h" #include "netns.h" @@ -98,7 +99,11 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, .net = mds_clp->cl_net, .timeparms = &ds_timeout, .cred = mds_srv->cred, - .xprtsec = mds_clp->cl_xprtsec, + .xprtsec = { + .policy = RPC_XPRTSEC_NONE, + .cert_serial = TLS_NO_CERT, + .privkey_serial = TLS_NO_PRIVKEY, + }, .connect_timeout = connect_timeout, .reconnect_timeout = connect_timeout, }; @@ -111,9 +116,14 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, cl_init.hostname = buf; switch (ds_proto) { + case XPRT_TRANSPORT_TCP_TLS: + if (mds_clp->cl_xprtsec.policy != RPC_XPRTSEC_NONE) + cl_init.xprtsec = mds_clp->cl_xprtsec; + else + ds_proto = XPRT_TRANSPORT_TCP; + fallthrough; case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_TCP_TLS: if (mds_clp->cl_nconnect > 1) cl_init.nconnect = mds_clp->cl_nconnect; } diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 5998d6bd8a4f..3a4baed993c9 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "internal.h" #include "callback.h" #include "delegation.h" @@ -983,7 +984,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, .net = mds_clp->cl_net, .timeparms = &ds_timeout, .cred = mds_srv->cred, - .xprtsec = mds_srv->nfs_client->cl_xprtsec, + .xprtsec = { + .policy = RPC_XPRTSEC_NONE, + .cert_serial = TLS_NO_CERT, + .privkey_serial = TLS_NO_PRIVKEY, + }, }; char buf[INET6_ADDRSTRLEN + 1]; @@ -992,9 +997,14 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, cl_init.hostname = buf; switch (ds_proto) { + case XPRT_TRANSPORT_TCP_TLS: + if (mds_srv->nfs_client->cl_xprtsec.policy != RPC_XPRTSEC_NONE) + cl_init.xprtsec = mds_srv->nfs_client->cl_xprtsec; + else + ds_proto = XPRT_TRANSPORT_TCP; + fallthrough; case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_TCP_TLS: if (mds_clp->cl_nconnect > 1) { cl_init.nconnect = mds_clp->cl_nconnect; cl_init.max_connect = NFS_MAX_TRANSPORTS; From fb2cba0854a7f315c8100a807a6959b99d72479e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 18 Oct 2025 20:10:36 -0400 Subject: [PATCH 0914/1075] NFS: Check the TLS certificate fields in nfs_match_client() If the TLS security policy is of type RPC_XPRTSEC_TLS_X509, then the cert_serial and privkey_serial fields need to match as well since they define the client's identity, as presented to the server. Fixes: 90c9550a8d65 ("NFS: support the kernel keyring for TLS") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 4e3dcc157a83..54699299d5b1 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -338,6 +338,14 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat /* Match the xprt security policy */ if (clp->cl_xprtsec.policy != data->xprtsec.policy) continue; + if (clp->cl_xprtsec.policy == RPC_XPRTSEC_TLS_X509) { + if (clp->cl_xprtsec.cert_serial != + data->xprtsec.cert_serial) + continue; + if (clp->cl_xprtsec.privkey_serial != + data->xprtsec.privkey_serial) + continue; + } refcount_inc(&clp->cl_count); return clp; From 51a491f2708de79da76791523d40926921823b7e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 27 Oct 2025 09:08:31 -0400 Subject: [PATCH 0915/1075] nfs/localio: remove unecessary ENOTBLK handling in DIO WRITE support Each filesystem is meant to fallback to retrying DIO in terms buffered IO when it might encounter -ENOTBLK when issuing DIO (which can happen if the VFS cannot invalidate the page cache). So NFS doesn't need special handling for -ENOTBLK. Also, explicitly initialize a couple DIO related iocb members rather than simply rely on data structure zeroing. Fixes: c817248fc831 ("nfs/localio: add proper O_DIRECT support for READ and WRITE") Reported-by: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- fs/nfs/localio.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 2c0455e91571..0383d6eb2f46 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -315,6 +315,7 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, iocb->hdr = hdr; iocb->kiocb.ki_flags &= ~IOCB_APPEND; + iocb->kiocb.ki_complete = NULL; iocb->aio_complete_work = NULL; iocb->end_iter_index = -1; @@ -484,6 +485,7 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) /* Use buffered IO */ iocb->offset[0] = hdr->args.offset; iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len); + iocb->iter_is_dio_aligned[0] = false; iocb->n_iters = 1; } @@ -803,7 +805,7 @@ static void nfs_local_call_write(struct work_struct *work) iocb->kiocb.ki_complete = nfs_local_write_aio_complete; iocb->aio_complete_work = nfs_local_write_aio_complete_work; } -retry: + iocb->kiocb.ki_pos = iocb->offset[i]; status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]); if (status != -EIOCBQUEUED) { @@ -823,15 +825,6 @@ static void nfs_local_call_write(struct work_struct *work) nfs_local_pgio_done(iocb->hdr, status); break; } - } else if (unlikely(status == -ENOTBLK && - (iocb->kiocb.ki_flags & IOCB_DIRECT))) { - /* VFS will return -ENOTBLK if DIO WRITE fails to - * invalidate the page cache. Retry using buffered IO. - */ - iocb->kiocb.ki_flags &= ~IOCB_DIRECT; - iocb->kiocb.ki_complete = NULL; - iocb->aio_complete_work = NULL; - goto retry; } nfs_local_pgio_done(iocb->hdr, status); if (iocb->hdr->task.tk_status) From f2060bdc21d70f3d8a4753a9fd3b0b02cb48c0bc Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 27 Oct 2025 09:08:32 -0400 Subject: [PATCH 0916/1075] nfs/localio: add refcounting for each iocb IO associated with NFS pgio header Improve completion handling of as many as 3 IOs associated with each misaligned DIO by using a atomic_t to track completion of each IO. Update nfs_local_pgio_done() to use precise atomic_t accounting for remaining iov_iter (up to 3) associated with each iocb, so that each NFS LOCALIO pgio header is only released after all IOs have completed. But also allow early return if/when a short read or write occurs. Fixes reported BUG: KASAN: slab-use-after-free in nfs_local_call_read: https://lore.kernel.org/linux-nfs/aPSvi5Yr2lGOh5Jh@dell-per750-06-vm-07.rhts.eng.pek2.redhat.com/ Reported-by: Yongcheng Yang Fixes: c817248fc831 ("nfs/localio: add proper O_DIRECT support for READ and WRITE") Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- fs/nfs/localio.c | 110 +++++++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 43 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 0383d6eb2f46..647fa19b0479 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -42,7 +42,7 @@ struct nfs_local_kiocb { /* Begin mostly DIO-specific members */ size_t end_len; short int end_iter_index; - short int n_iters; + atomic_t n_iters; bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS]; loff_t offset[NFSLOCAL_MAX_IOS] ____cacheline_aligned; struct iov_iter iters[NFSLOCAL_MAX_IOS]; @@ -407,6 +407,7 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, iters[n_iters].count = local_dio->start_len; iocb->offset[n_iters] = iocb->hdr->args.offset; iocb->iter_is_dio_aligned[n_iters] = false; + atomic_inc(&iocb->n_iters); ++n_iters; } @@ -425,6 +426,7 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, /* Save index and length of end */ iocb->end_iter_index = n_iters; iocb->end_len = local_dio->end_len; + atomic_inc(&iocb->n_iters); ++n_iters; } @@ -448,7 +450,6 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, } ++n_iters; - iocb->n_iters = n_iters; return n_iters; } @@ -474,6 +475,12 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) } len = hdr->args.count - total; + /* + * For each iocb, iocb->n_iter is always at least 1 and we always + * end io after first nfs_local_pgio_done call unless misaligned DIO. + */ + atomic_set(&iocb->n_iters, 1); + if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { struct nfs_local_dio local_dio; @@ -486,7 +493,6 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) iocb->offset[0] = hdr->args.offset; iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len); iocb->iter_is_dio_aligned[0] = false; - iocb->n_iters = 1; } static void @@ -506,9 +512,11 @@ nfs_local_pgio_init(struct nfs_pgio_header *hdr, hdr->task.tk_start = ktime_get(); } -static void -nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) +static bool +nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status, bool force) { + struct nfs_pgio_header *hdr = iocb->hdr; + /* Must handle partial completions */ if (status >= 0) { hdr->res.count += status; @@ -519,6 +527,12 @@ nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status) hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status); hdr->task.tk_status = status; } + + if (force) + return true; + + BUG_ON(atomic_read(&iocb->n_iters) <= 0); + return atomic_dec_and_test(&iocb->n_iters); } static void @@ -549,11 +563,11 @@ static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb) queue_work(nfsiod_workqueue, &iocb->work); } -static void -nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) +static void nfs_local_read_done(struct nfs_local_kiocb *iocb) { struct nfs_pgio_header *hdr = iocb->hdr; struct file *filp = iocb->kiocb.ki_filp; + long status = hdr->task.tk_status; if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ @@ -574,12 +588,18 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status) status > 0 ? status : 0, hdr->res.eof); } +static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb) +{ + nfs_local_read_done(iocb); + nfs_local_pgio_release(iocb); +} + static void nfs_local_read_aio_complete_work(struct work_struct *work) { struct nfs_local_kiocb *iocb = container_of(work, struct nfs_local_kiocb, work); - nfs_local_pgio_release(iocb); + nfs_local_read_iocb_done(iocb); } static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) @@ -587,8 +607,10 @@ static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) struct nfs_local_kiocb *iocb = container_of(kiocb, struct nfs_local_kiocb, kiocb); - nfs_local_pgio_done(iocb->hdr, ret); - nfs_local_read_done(iocb, ret); + /* AIO completion of DIO read should always be last to complete */ + if (unlikely(!nfs_local_pgio_done(iocb, ret, false))) + return; + nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ } @@ -599,10 +621,13 @@ static void nfs_local_call_read(struct work_struct *work) struct file *filp = iocb->kiocb.ki_filp; const struct cred *save_cred; ssize_t status; + int n_iters; save_cred = override_creds(filp->f_cred); - for (int i = 0; i < iocb->n_iters ; i++) { + n_iters = atomic_read(&iocb->n_iters); + for (int i = 0; i < n_iters ; i++) { + /* DIO-aligned middle is always issued last with AIO completion */ if (iocb->iter_is_dio_aligned[i]) { iocb->kiocb.ki_flags |= IOCB_DIRECT; iocb->kiocb.ki_complete = nfs_local_read_aio_complete; @@ -612,18 +637,14 @@ static void nfs_local_call_read(struct work_struct *work) iocb->kiocb.ki_pos = iocb->offset[i]; status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); if (status != -EIOCBQUEUED) { - nfs_local_pgio_done(iocb->hdr, status); - if (iocb->hdr->task.tk_status) + if (nfs_local_pgio_done(iocb, status, false)) { + nfs_local_read_iocb_done(iocb); break; + } } } revert_creds(save_cred); - - if (status != -EIOCBQUEUED) { - nfs_local_read_done(iocb, status); - nfs_local_pgio_release(iocb); - } } static int @@ -738,11 +759,10 @@ static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb) fattr->du.nfs3.used = stat.blocks << 9; } -static void -nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) +static void nfs_local_write_done(struct nfs_local_kiocb *iocb) { struct nfs_pgio_header *hdr = iocb->hdr; - struct inode *inode = hdr->inode; + long status = hdr->task.tk_status; dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0); @@ -761,10 +781,17 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status) nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset); status = -ENOSPC; /* record -ENOSPC in terms of nfs_local_pgio_done */ - nfs_local_pgio_done(hdr, status); + (void) nfs_local_pgio_done(iocb, status, true); } if (hdr->task.tk_status < 0) - nfs_reset_boot_verifier(inode); + nfs_reset_boot_verifier(hdr->inode); +} + +static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb) +{ + nfs_local_write_done(iocb); + nfs_local_vfs_getattr(iocb); + nfs_local_pgio_release(iocb); } static void nfs_local_write_aio_complete_work(struct work_struct *work) @@ -772,8 +799,7 @@ static void nfs_local_write_aio_complete_work(struct work_struct *work) struct nfs_local_kiocb *iocb = container_of(work, struct nfs_local_kiocb, work); - nfs_local_vfs_getattr(iocb); - nfs_local_pgio_release(iocb); + nfs_local_write_iocb_done(iocb); } static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) @@ -781,8 +807,10 @@ static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) struct nfs_local_kiocb *iocb = container_of(kiocb, struct nfs_local_kiocb, kiocb); - nfs_local_pgio_done(iocb->hdr, ret); - nfs_local_write_done(iocb, ret); + /* AIO completion of DIO write should always be last to complete */ + if (unlikely(!nfs_local_pgio_done(iocb, ret, false))) + return; + nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ } @@ -793,13 +821,17 @@ static void nfs_local_call_write(struct work_struct *work) struct file *filp = iocb->kiocb.ki_filp; unsigned long old_flags = current->flags; const struct cred *save_cred; + bool force_done = false; ssize_t status; + int n_iters; current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; save_cred = override_creds(filp->f_cred); file_start_write(filp); - for (int i = 0; i < iocb->n_iters ; i++) { + n_iters = atomic_read(&iocb->n_iters); + for (int i = 0; i < n_iters ; i++) { + /* DIO-aligned middle is always issued last with AIO completion */ if (iocb->iter_is_dio_aligned[i]) { iocb->kiocb.ki_flags |= IOCB_DIRECT; iocb->kiocb.ki_complete = nfs_local_write_aio_complete; @@ -812,35 +844,27 @@ static void nfs_local_call_write(struct work_struct *work) if (unlikely(status >= 0 && status < iocb->iters[i].count)) { /* partial write */ if (i == iocb->end_iter_index) { - /* Must not account partial end, otherwise, due - * to end being issued before middle: the partial + /* Must not account DIO partial end, otherwise (due + * to end being issued before middle): the partial * write accounting in nfs_local_write_done() * would incorrectly advance hdr->args.offset */ status = 0; } else { - /* Partial write at start or buffered middle, - * exit early. - */ - nfs_local_pgio_done(iocb->hdr, status); - break; + /* Partial write at start or middle, force done */ + force_done = true; } } - nfs_local_pgio_done(iocb->hdr, status); - if (iocb->hdr->task.tk_status) + if (nfs_local_pgio_done(iocb, status, force_done)) { + nfs_local_write_iocb_done(iocb); break; + } } } file_end_write(filp); revert_creds(save_cred); current->flags = old_flags; - - if (status != -EIOCBQUEUED) { - nfs_local_write_done(iocb, status); - nfs_local_vfs_getattr(iocb); - nfs_local_pgio_release(iocb); - } } static int From d0497dd27452c79a48414df813a16cd12d274b3b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 27 Oct 2025 09:08:33 -0400 Subject: [PATCH 0917/1075] nfs/localio: backfill missing partial read support for misaligned DIO Misaligned DIO read can be split into 3 IOs, must handle potential for short read from each component IO (follows same pattern used for handling partial writes, except upper layer read code handles advancing offset before retry). Fixes: c817248fc831 ("nfs/localio: add proper O_DIRECT support for READ and WRITE") Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- fs/nfs/localio.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 647fa19b0479..9c205f8b5e59 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -414,7 +414,7 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, /* Setup misaligned end? * If so, the end is purposely setup to be issued using buffered IO * before the middle (which will use DIO, if DIO-aligned, with AIO). - * This creates problems if/when the end results in a partial write. + * This creates problems if/when the end results in short read or write. * So must save index and length of end to handle this corner case. */ if (local_dio->end_len) { @@ -580,8 +580,9 @@ static void nfs_local_read_done(struct nfs_local_kiocb *iocb) */ hdr->res.replen = 0; - if (hdr->res.count != hdr->args.count || - hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp))) + /* nfs_readpage_result() handles short read */ + + if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp))) hdr->res.eof = true; dprintk("%s: read %ld bytes eof %d.\n", __func__, @@ -620,6 +621,7 @@ static void nfs_local_call_read(struct work_struct *work) container_of(work, struct nfs_local_kiocb, work); struct file *filp = iocb->kiocb.ki_filp; const struct cred *save_cred; + bool force_done = false; ssize_t status; int n_iters; @@ -637,7 +639,21 @@ static void nfs_local_call_read(struct work_struct *work) iocb->kiocb.ki_pos = iocb->offset[i]; status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); if (status != -EIOCBQUEUED) { - if (nfs_local_pgio_done(iocb, status, false)) { + if (unlikely(status >= 0 && status < iocb->iters[i].count)) { + /* partial read */ + if (i == iocb->end_iter_index) { + /* Must not account DIO partial end, otherwise (due + * to end being issued before middle): the partial + * read accounting in nfs_local_read_done() + * would incorrectly advance hdr->args.offset + */ + status = 0; + } else { + /* Partial read at start or middle, force done */ + force_done = true; + } + } + if (nfs_local_pgio_done(iocb, status, force_done)) { nfs_local_read_iocb_done(iocb); break; } From d32ddfeb559342e89a4d06b1df4e7e5e96df3762 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 27 Oct 2025 13:52:28 -0400 Subject: [PATCH 0918/1075] nfs/localio: Ensure DIO WRITE's IO on stable storage upon completion LOCALIO's misaligned DIO WRITE support requires synchronous IO for any misaligned head and/or tail that are issued using buffered IO. In addition, it is important that the O_DIRECT middle be on stable storage upon its completion via AIO. Otherwise, a misaligned DIO WRITE could mix buffered IO for the head/tail and direct IO for the DIO-aligned middle -- which could lead to problems associated with deferred writes to stable storage (such as out of order partial completions causing incorrect advancement of the file's offset, etc). Fixes: c817248fc831 ("nfs/localio: add proper O_DIRECT support for READ and WRITE") Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- fs/nfs/localio.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 9c205f8b5e59..839dbda0b370 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -485,8 +485,12 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) struct nfs_local_dio local_dio; if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) && - nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) + nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) { + /* Ensure DIO WRITE's IO on stable storage upon completion */ + if (rw == ITER_SOURCE) + iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; return; /* is DIO-aligned */ + } } /* Use buffered IO */ From eb2d6774cc0d9d6ab8f924825695a85c14b2e0c2 Mon Sep 17 00:00:00 2001 From: Niranjan H Y Date: Mon, 10 Nov 2025 20:56:46 +0530 Subject: [PATCH 0919/1075] ASoC: SDCA: bug fix while parsing mipi-sdca-control-cn-list "struct sdca_control" declares "values" field as integer array. But the memory allocated to it is of char array. This causes crash for sdca_parse_function API. This patch addresses the issue by allocating correct data size. Signed-off-by: Niranjan H Y Reviewed-by: Charles Keepax Link: https://patch.msgid.link/20251110152646.192-1-niranjan.hy@ti.com Signed-off-by: Mark Brown --- sound/soc/sdca/sdca_functions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/sdca/sdca_functions.c b/sound/soc/sdca/sdca_functions.c index 13f68f7b6dd6..0ccb6775f4de 100644 --- a/sound/soc/sdca/sdca_functions.c +++ b/sound/soc/sdca/sdca_functions.c @@ -894,7 +894,8 @@ static int find_sdca_entity_control(struct device *dev, struct sdca_entity *enti return ret; } - control->values = devm_kzalloc(dev, hweight64(control->cn_list), GFP_KERNEL); + control->values = devm_kcalloc(dev, hweight64(control->cn_list), + sizeof(int), GFP_KERNEL); if (!control->values) return -ENOMEM; From 0b2f7be548006b0651e1e8320790f49723265cbc Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Mon, 27 Oct 2025 14:56:43 +0530 Subject: [PATCH 0920/1075] drm/xe/xe3: Add WA_14024681466 for Xe3_LPG Apply WA_14024681466 to Xe3_LPG graphics IP versions from 30.00 to 30.05. v2: (Matthew Roper) - Remove stepping filter as workaround applies to all steppings. - Add an engine class filter so it only applies to the RENDER engine. Signed-off-by: Nitin Gote Link: https://patch.msgid.link/20251027092643.335904-1-nitin.r.gote@intel.com Reviewed-by: Matt Roper Signed-off-by: Matt Roper (cherry picked from commit 071089a69e199bd810ff31c4c933bd528e502743) Cc: stable@vger.kernel.org # v6.16+ Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/regs/xe_gt_regs.h | 1 + drivers/gpu/drm/xe/xe_wa.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 51f2a03847f9..f680c8b8f258 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -168,6 +168,7 @@ #define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED) #define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14) +#define FAST_CLEAR_VALIGN_FIX REG_BIT(13) #define XE2LPM_CCCHKNREG1 XE_REG(0x82a8) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index cd03891654a1..c33719e2e0df 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -916,6 +916,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3003), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE)) }, + { XE_RTP_NAME("14024681466"), + XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { From fa3376319b83ba8b7fd55f2c1a268dcbf9d6eedc Mon Sep 17 00:00:00 2001 From: Tangudu Tilak Tirumalesh Date: Thu, 30 Oct 2025 21:16:26 +0530 Subject: [PATCH 0921/1075] drm/xe/xe3: Extend wa_14023061436 Extend wa_14023061436 to Graphics Versions 30.03, 30.04 and 30.05. Signed-off-by: Tangudu Tilak Tirumalesh Reviewed-by: Matt Roper Link: https://patch.msgid.link/20251030154626.3124565-1-tilak.tirumalesh.tangudu@intel.com Signed-off-by: Matt Roper (cherry picked from commit 0dd656d06f50ae4cedf160634cf13fd9e0944cf7) Cc: stable@vger.kernel.org # v6.17+ Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c33719e2e0df..917b97317d11 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -679,6 +679,8 @@ static const struct xe_rtp_entry_sr engine_was[] = { }, { XE_RTP_NAME("14023061436"), XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), + FUNC(xe_rtp_match_first_render_or_compute), OR, + GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE)) }, From 240372edaf854c9136f5ead45f2d8cd9496a9cb3 Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Thu, 6 Nov 2025 15:35:17 +0530 Subject: [PATCH 0922/1075] drm/xe/xe3lpg: Extend Wa_15016589081 for xe3lpg Wa_15016589081 applies to Xe3_LPG renderCS Signed-off-by: Nitin Gote Link: https://patch.msgid.link/20251106100516.318863-2-nitin.r.gote@intel.com Signed-off-by: Matt Roper (cherry picked from commit 715974499a2199bd199fb4630501f55545342ea4) Cc: stable@vger.kernel.org # v6.16+ Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_wa.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 917b97317d11..3cf30718b200 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -922,6 +922,11 @@ static const struct xe_rtp_entry_sr lrc_was[] = { XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX)) }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), + ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, }; static __maybe_unused const struct xe_rtp_entry oob_was[] = { From 6a218b9c3183ed19d5703130025282cf20463d87 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 5 Nov 2025 22:03:04 -0500 Subject: [PATCH 0923/1075] nfs/localio: do not issue misaligned DIO out-of-order From https://lore.kernel.org/linux-nfs/aQHASIumLJyOoZGH@infradead.org/ On Wed, Oct 29, 2025 at 12:20:40AM -0700, Christoph Hellwig wrote: > On Mon, Oct 27, 2025 at 12:18:30PM -0400, Mike Snitzer wrote: > > LOCALIO's misaligned DIO will issue head/tail followed by O_DIRECT > > middle (via AIO completion of that aligned middle). So out of order > > relative to file offset. > > That's in general a really bad idea. It will obviously work, but > both on SSDs and out of place write file systems it is a sure way > to increase your garbage collection overhead a lot down the line. Fix this by never issuing misaligned DIO out of order. This fix means the DIO-aligned middle will only use AIO completion if there is no misaligned end segment. Otherwise, all 3 segments of a misaligned DIO will be issued without AIO completion to ensure file offset increases properly for all partial READ or WRITE situations. Factoring out nfs_local_iter_setup() helps standardize repetitive nfs_local_iters_setup_dio() code and is inspired by cleanup work that Chuck Lever did on the NFSD Direct code. Fixes: c817248fc831 ("nfs/localio: add proper O_DIRECT support for READ and WRITE") Reported-by: Christoph Hellwig Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- fs/nfs/localio.c | 128 +++++++++++++++++++---------------------------- 1 file changed, 52 insertions(+), 76 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 839dbda0b370..656976b4f42c 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -44,8 +44,7 @@ struct nfs_local_kiocb { short int end_iter_index; atomic_t n_iters; bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS]; - loff_t offset[NFSLOCAL_MAX_IOS] ____cacheline_aligned; - struct iov_iter iters[NFSLOCAL_MAX_IOS]; + struct iov_iter iters[NFSLOCAL_MAX_IOS] ____cacheline_aligned; /* End mostly DIO-specific members */ }; @@ -314,6 +313,7 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, init_sync_kiocb(&iocb->kiocb, file); iocb->hdr = hdr; + iocb->kiocb.ki_pos = hdr->args.offset; iocb->kiocb.ki_flags &= ~IOCB_APPEND; iocb->kiocb.ki_complete = NULL; iocb->aio_complete_work = NULL; @@ -389,13 +389,24 @@ static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i, return true; } +static void +nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec, + unsigned int nvecs, unsigned long total, + size_t start, size_t len) +{ + iov_iter_bvec(iter, rw, bvec, nvecs, total); + if (start) + iov_iter_advance(iter, start); + iov_iter_truncate(iter, len); +} + /* * Setup as many as 3 iov_iter based on extents described by @local_dio. * Returns the number of iov_iter that were setup. */ static int nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, - unsigned int nvecs, size_t len, + unsigned int nvecs, unsigned long total, struct nfs_local_dio *local_dio) { int n_iters = 0; @@ -403,41 +414,17 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, /* Setup misaligned start? */ if (local_dio->start_len) { - iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); - iters[n_iters].count = local_dio->start_len; - iocb->offset[n_iters] = iocb->hdr->args.offset; - iocb->iter_is_dio_aligned[n_iters] = false; - atomic_inc(&iocb->n_iters); + nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, + nvecs, total, 0, local_dio->start_len); ++n_iters; } - /* Setup misaligned end? - * If so, the end is purposely setup to be issued using buffered IO - * before the middle (which will use DIO, if DIO-aligned, with AIO). - * This creates problems if/when the end results in short read or write. - * So must save index and length of end to handle this corner case. + /* + * Setup DIO-aligned middle, if there is no misaligned end (below) + * then AIO completion is used, see nfs_local_call_{read,write} */ - if (local_dio->end_len) { - iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); - iocb->offset[n_iters] = local_dio->end_offset; - iov_iter_advance(&iters[n_iters], - local_dio->start_len + local_dio->middle_len); - iocb->iter_is_dio_aligned[n_iters] = false; - /* Save index and length of end */ - iocb->end_iter_index = n_iters; - iocb->end_len = local_dio->end_len; - atomic_inc(&iocb->n_iters); - ++n_iters; - } - - /* Setup DIO-aligned middle to be issued last, to allow for - * DIO with AIO completion (see nfs_local_call_{read,write}). - */ - iov_iter_bvec(&iters[n_iters], rw, iocb->bvec, nvecs, len); - if (local_dio->start_len) - iov_iter_advance(&iters[n_iters], local_dio->start_len); - iters[n_iters].count -= local_dio->end_len; - iocb->offset[n_iters] = local_dio->middle_offset; + nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs, + total, local_dio->start_len, local_dio->middle_len); iocb->iter_is_dio_aligned[n_iters] = nfs_iov_iter_aligned_bvec(&iters[n_iters], @@ -445,11 +432,22 @@ nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) { trace_nfs_local_dio_misaligned(iocb->hdr->inode, - iocb->hdr->args.offset, len, local_dio); + local_dio->start_len, local_dio->middle_len, local_dio); return 0; /* no DIO-aligned IO possible */ } + iocb->end_iter_index = n_iters; ++n_iters; + /* Setup misaligned end? */ + if (local_dio->end_len) { + nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, + nvecs, total, local_dio->start_len + + local_dio->middle_len, local_dio->end_len); + iocb->end_iter_index = n_iters; + ++n_iters; + } + + atomic_set(&iocb->n_iters, n_iters); return n_iters; } @@ -476,7 +474,7 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) len = hdr->args.count - total; /* - * For each iocb, iocb->n_iter is always at least 1 and we always + * For each iocb, iocb->n_iters is always at least 1 and we always * end io after first nfs_local_pgio_done call unless misaligned DIO. */ atomic_set(&iocb->n_iters, 1); @@ -494,9 +492,7 @@ nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) } /* Use buffered IO */ - iocb->offset[0] = hdr->args.offset; iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len); - iocb->iter_is_dio_aligned[0] = false; } static void @@ -633,30 +629,20 @@ static void nfs_local_call_read(struct work_struct *work) n_iters = atomic_read(&iocb->n_iters); for (int i = 0; i < n_iters ; i++) { - /* DIO-aligned middle is always issued last with AIO completion */ if (iocb->iter_is_dio_aligned[i]) { iocb->kiocb.ki_flags |= IOCB_DIRECT; - iocb->kiocb.ki_complete = nfs_local_read_aio_complete; - iocb->aio_complete_work = nfs_local_read_aio_complete_work; - } + /* Only use AIO completion if DIO-aligned segment is last */ + if (i == iocb->end_iter_index) { + iocb->kiocb.ki_complete = nfs_local_read_aio_complete; + iocb->aio_complete_work = nfs_local_read_aio_complete_work; + } + } else + iocb->kiocb.ki_flags &= ~IOCB_DIRECT; - iocb->kiocb.ki_pos = iocb->offset[i]; status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); if (status != -EIOCBQUEUED) { - if (unlikely(status >= 0 && status < iocb->iters[i].count)) { - /* partial read */ - if (i == iocb->end_iter_index) { - /* Must not account DIO partial end, otherwise (due - * to end being issued before middle): the partial - * read accounting in nfs_local_read_done() - * would incorrectly advance hdr->args.offset - */ - status = 0; - } else { - /* Partial read at start or middle, force done */ - force_done = true; - } - } + if (unlikely(status >= 0 && status < iocb->iters[i].count)) + force_done = true; /* Partial read */ if (nfs_local_pgio_done(iocb, status, force_done)) { nfs_local_read_iocb_done(iocb); break; @@ -851,30 +837,20 @@ static void nfs_local_call_write(struct work_struct *work) file_start_write(filp); n_iters = atomic_read(&iocb->n_iters); for (int i = 0; i < n_iters ; i++) { - /* DIO-aligned middle is always issued last with AIO completion */ if (iocb->iter_is_dio_aligned[i]) { iocb->kiocb.ki_flags |= IOCB_DIRECT; - iocb->kiocb.ki_complete = nfs_local_write_aio_complete; - iocb->aio_complete_work = nfs_local_write_aio_complete_work; - } + /* Only use AIO completion if DIO-aligned segment is last */ + if (i == iocb->end_iter_index) { + iocb->kiocb.ki_complete = nfs_local_write_aio_complete; + iocb->aio_complete_work = nfs_local_write_aio_complete_work; + } + } else + iocb->kiocb.ki_flags &= ~IOCB_DIRECT; - iocb->kiocb.ki_pos = iocb->offset[i]; status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]); if (status != -EIOCBQUEUED) { - if (unlikely(status >= 0 && status < iocb->iters[i].count)) { - /* partial write */ - if (i == iocb->end_iter_index) { - /* Must not account DIO partial end, otherwise (due - * to end being issued before middle): the partial - * write accounting in nfs_local_write_done() - * would incorrectly advance hdr->args.offset - */ - status = 0; - } else { - /* Partial write at start or middle, force done */ - force_done = true; - } - } + if (unlikely(status >= 0 && status < iocb->iters[i].count)) + force_done = true; /* Partial write */ if (nfs_local_pgio_done(iocb, status, force_done)) { nfs_local_write_iocb_done(iocb); break; From 85d2c2392ac6348e1171d627497034a341a250c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 28 Oct 2025 17:27:43 -0400 Subject: [PATCH 0924/1075] NFSv2/v3: Fix error handling in nfs_atomic_open_v23() When nfs_do_create() returns an EEXIST error, it means that a regular file could not be created. That could mean that a symlink needs to be resolved. If that's the case, a lookup needs to be kicked off. Reported-by: Stephen Abbene Link: https://bugzilla.kernel.org/show_bug.cgi?id=220710 Fixes: 7c6c5249f061 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.") Signed-off-by: Trond Myklebust Reviewed-by: NeilBrown Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 46d9c65d50f8..ea9f6ca8f30f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2268,11 +2268,12 @@ int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry, return -ENAMETOOLONG; if (open_flags & O_CREAT) { - file->f_mode |= FMODE_CREATED; error = nfs_do_create(dir, dentry, mode, open_flags); - if (error) + if (!error) { + file->f_mode |= FMODE_CREATED; + return finish_open(file, dentry, NULL); + } else if (error != -EEXIST || open_flags & O_EXCL) return error; - return finish_open(file, dentry, NULL); } if (d_in_lookup(dentry)) { /* The only flags nfs_lookup considers are From 7a7a3456520b309a0bffa1d9d62bd6c9dcab89b3 Mon Sep 17 00:00:00 2001 From: Yang Xiuwei Date: Thu, 30 Oct 2025 11:03:25 +0800 Subject: [PATCH 0925/1075] NFS: sysfs: fix leak when nfs_client kobject add fails If adding the second kobject fails, drop both references to avoid sysfs residue and memory leak. Fixes: e96f9268eea6 ("NFS: Make all of /sys/fs/nfs network-namespace unique") Signed-off-by: Yang Xiuwei Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/sysfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 545148d42dcc..ea6e6168092b 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -189,6 +189,7 @@ static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, return p; kobject_put(&p->kobject); + kobject_put(&p->nfs_net_kobj); } return NULL; } From 1f214e9c3aef2d0936be971072e991d78a174d71 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 31 Oct 2025 10:51:42 -0400 Subject: [PATCH 0926/1075] NFSv4: Fix an incorrect parameter when calling nfs4_call_sync() The Smatch static checker noted that in _nfs4_proc_lookupp(), the flag RPC_TASK_TIMEOUT is being passed as an argument to nfs4_init_sequence(), which is clearly incorrect. Since LOOKUPP is an idempotent operation, nfs4_init_sequence() should not ask the server to cache the result. The RPC_TASK_TIMEOUT flag needs to be passed down to the RPC layer. Reported-by: Dan Carpenter Reported-by: Harshit Mogalapalli Fixes: 76998ebb9158 ("NFSv4: Observe the NFS_MOUNT_SOFTREVAL flag in _nfs4_proc_lookupp") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 411776718494..93c6ce04332b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4715,16 +4715,19 @@ static int _nfs4_proc_lookupp(struct inode *inode, }; unsigned short task_flags = 0; - if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) + if (server->flags & NFS_MOUNT_SOFTREVAL) task_flags |= RPC_TASK_TIMEOUT; + if (server->caps & NFS_CAP_MOVEABLE) + task_flags |= RPC_TASK_MOVEABLE; args.bitmask = nfs4_bitmask(server, fattr->label); nfs_fattr_init(fattr); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino); - status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, - &res.seq_res, task_flags); + status = nfs4_do_call_sync(clnt, server, &msg, &args.seq_args, + &res.seq_res, task_flags); dprintk("NFS reply lookupp: %d\n", status); return status; } From 55fb52ffdd62850d667ebed842815e072d3c9961 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Sun, 2 Nov 2025 20:16:12 +0200 Subject: [PATCH 0927/1075] Bluetooth: MGMT: cancel mesh send timer when hdev removed mesh_send_done timer is not canceled when hdev is removed, which causes crash if the timer triggers after hdev is gone. Cancel the timer when MGMT removes the hdev, like other MGMT timers. Should fix the BUG: sporadically seen by BlueZ test bot (in "Mesh - Send cancel - 1" test). Log: ------ BUG: KASAN: slab-use-after-free in run_timer_softirq+0x76b/0x7d0 ... Freed by task 36: kasan_save_stack+0x24/0x50 kasan_save_track+0x14/0x30 __kasan_save_free_info+0x3a/0x60 __kasan_slab_free+0x43/0x70 kfree+0x103/0x500 device_release+0x9a/0x210 kobject_put+0x100/0x1e0 vhci_release+0x18b/0x240 ------ Fixes: b338d91703fa ("Bluetooth: Implement support for Mesh") Link: https://lore.kernel.org/linux-bluetooth/67364c09.0c0a0220.113cba.39ff@mx.google.com/ Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/mgmt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 79762bfaea5f..262bf984d2aa 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -9497,6 +9497,7 @@ void mgmt_index_removed(struct hci_dev *hdev) cancel_delayed_work_sync(&hdev->discov_off); cancel_delayed_work_sync(&hdev->service_cache); cancel_delayed_work_sync(&hdev->rpa_expired); + cancel_delayed_work_sync(&hdev->mesh_send_done); } void mgmt_power_on(struct hci_dev *hdev, int err) From 23d22f2f71768034d6ef86168213843fc49bf550 Mon Sep 17 00:00:00 2001 From: Raphael Pinsonneault-Thibeault Date: Wed, 5 Nov 2025 14:28:41 -0500 Subject: [PATCH 0928/1075] Bluetooth: btusb: reorder cleanup in btusb_disconnect to avoid UAF There is a KASAN: slab-use-after-free read in btusb_disconnect(). Calling "usb_driver_release_interface(&btusb_driver, data->intf)" will free the btusb data associated with the interface. The same data is then used later in the function, hence the UAF. Fix by moving the accesses to btusb data to before the data is free'd. Reported-by: syzbot+2fc81b50a4f8263a159b@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=2fc81b50a4f8263a159b Tested-by: syzbot+2fc81b50a4f8263a159b@syzkaller.appspotmail.com Fixes: fd913ef7ce619 ("Bluetooth: btusb: Add out-of-band wakeup support") Signed-off-by: Raphael Pinsonneault-Thibeault Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btusb.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 5e9ebf0c5312..a722446ec73d 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -4361,6 +4361,11 @@ static void btusb_disconnect(struct usb_interface *intf) hci_unregister_dev(hdev); + if (data->oob_wake_irq) + device_init_wakeup(&data->udev->dev, false); + if (data->reset_gpio) + gpiod_put(data->reset_gpio); + if (intf == data->intf) { if (data->isoc) usb_driver_release_interface(&btusb_driver, data->isoc); @@ -4371,17 +4376,11 @@ static void btusb_disconnect(struct usb_interface *intf) usb_driver_release_interface(&btusb_driver, data->diag); usb_driver_release_interface(&btusb_driver, data->intf); } else if (intf == data->diag) { - usb_driver_release_interface(&btusb_driver, data->intf); if (data->isoc) usb_driver_release_interface(&btusb_driver, data->isoc); + usb_driver_release_interface(&btusb_driver, data->intf); } - if (data->oob_wake_irq) - device_init_wakeup(&data->udev->dev, false); - - if (data->reset_gpio) - gpiod_put(data->reset_gpio); - hci_free_dev(hdev); } From 3b78f50918276ab28fb22eac9aa49401ac436a3b Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 3 Nov 2025 20:29:46 +0200 Subject: [PATCH 0929/1075] Bluetooth: 6lowpan: reset link-local header on ipv6 recv path Bluetooth 6lowpan.c netdev has header_ops, so it must set link-local header for RX skb, otherwise things crash, eg. with AF_PACKET SOCK_RAW Add missing skb_reset_mac_header() for uncompressed ipv6 RX path. For the compressed one, it is done in lowpan_header_decompress(). Log: (BlueZ 6lowpan-tester Client Recv Raw - Success) ------ kernel BUG at net/core/skbuff.c:212! Call Trace: ... packet_rcv (net/packet/af_packet.c:2152) ... __local_bh_enable_ip (kernel/softirq.c:407) netif_rx (net/core/dev.c:5648) chan_recv_cb (net/bluetooth/6lowpan.c:294 net/bluetooth/6lowpan.c:359) ------ Fixes: 18722c247023 ("Bluetooth: Enable 6LoWPAN support for BT LE devices") Reviewed-by: Paul Menzel Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/6lowpan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index f0c862091bff..f1d29fa4b411 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -289,6 +289,7 @@ static int recv_pkt(struct sk_buff *skb, struct net_device *dev, local_skb->pkt_type = PACKET_HOST; local_skb->dev = dev; + skb_reset_mac_header(local_skb); skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { From b454505bf57a2e4f5d49951d4deb03730a9348d9 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 3 Nov 2025 20:29:47 +0200 Subject: [PATCH 0930/1075] Bluetooth: 6lowpan: fix BDADDR_LE vs ADDR_LE_DEV address type confusion Bluetooth 6lowpan.c confuses BDADDR_LE and ADDR_LE_DEV address types, e.g. debugfs "connect" command takes the former, and "disconnect" and "connect" to already connected device take the latter. This is due to using same value both for l2cap_chan_connect and hci_conn_hash_lookup_le which take different dst_type values. Fix address type passed to hci_conn_hash_lookup_le(). Retain the debugfs API difference between "connect" and "disconnect" commands since it's been like this since 2015 and nobody apparently complained. Fixes: f5ad4ffceba0 ("Bluetooth: 6lowpan: Use hci_conn_hash_lookup_le() when possible") Reviewed-by: Paul Menzel Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/6lowpan.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index f1d29fa4b411..0d8c2e2e9a6c 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -957,10 +957,11 @@ static struct l2cap_chan *bt_6lowpan_listen(void) } static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, - struct l2cap_conn **conn) + struct l2cap_conn **conn, bool disconnect) { struct hci_conn *hcon; struct hci_dev *hdev; + int le_addr_type; int n; n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", @@ -971,13 +972,32 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, if (n < 7) return -EINVAL; + if (disconnect) { + /* The "disconnect" debugfs command has used different address + * type constants than "connect" since 2015. Let's retain that + * for now even though it's obviously buggy... + */ + *addr_type += 1; + } + + switch (*addr_type) { + case BDADDR_LE_PUBLIC: + le_addr_type = ADDR_LE_DEV_PUBLIC; + break; + case BDADDR_LE_RANDOM: + le_addr_type = ADDR_LE_DEV_RANDOM; + break; + default: + return -EINVAL; + } + /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */ hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC); if (!hdev) return -ENOENT; hci_dev_lock(hdev); - hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); + hcon = hci_conn_hash_lookup_le(hdev, addr, le_addr_type); hci_dev_unlock(hdev); hci_dev_put(hdev); @@ -1104,7 +1124,7 @@ static ssize_t lowpan_control_write(struct file *fp, buf[buf_size] = '\0'; if (memcmp(buf, "connect ", 8) == 0) { - ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn); + ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn, false); if (ret == -EINVAL) return ret; @@ -1141,7 +1161,7 @@ static ssize_t lowpan_control_write(struct file *fp, } if (memcmp(buf, "disconnect ", 11) == 0) { - ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn); + ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn, true); if (ret < 0) return ret; From e060088db0bdf7932e0e3c2d24b7371c4c5b867c Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 3 Nov 2025 20:29:48 +0200 Subject: [PATCH 0931/1075] Bluetooth: L2CAP: export l2cap_chan_hold for modules l2cap_chan_put() is exported, so export also l2cap_chan_hold() for modules. l2cap_chan_hold() has use case in net/bluetooth/6lowpan.c Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/l2cap_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index d08320380ad6..35c57657bcf4 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -497,6 +497,7 @@ void l2cap_chan_hold(struct l2cap_chan *c) kref_get(&c->kref); } +EXPORT_SYMBOL_GPL(l2cap_chan_hold); struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) { From 98454bc812f3611551e4b1f81732da4aa7b9597e Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 3 Nov 2025 20:29:49 +0200 Subject: [PATCH 0932/1075] Bluetooth: 6lowpan: Don't hold spin lock over sleeping functions disconnect_all_peers() calls sleeping function (l2cap_chan_close) under spinlock. Holding the lock doesn't actually do any good -- we work on a local copy of the list, and the lock doesn't protect against peer->chan having already been freed. Fix by taking refcounts of peer->chan instead. Clean up the code and old comments a bit. Take devices_lock instead of RCU, because the kfree_rcu(); l2cap_chan_put(); construct in chan_close_cb() does not guarantee peer->chan is necessarily valid in RCU. Also take l2cap_chan_lock() which is required for l2cap_chan_close(). Log: (bluez 6lowpan-tester Client Connect - Disable) ------ BUG: sleeping function called from invalid context at kernel/locking/mutex.c:575 ... ... l2cap_send_disconn_req (net/bluetooth/l2cap_core.c:938 net/bluetooth/l2cap_core.c:1495) ... ? __pfx_l2cap_chan_close (net/bluetooth/l2cap_core.c:809) do_enable_set (net/bluetooth/6lowpan.c:1048 net/bluetooth/6lowpan.c:1068) ------ Fixes: 90305829635d ("Bluetooth: 6lowpan: Converting rwlocks to use RCU") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/6lowpan.c | 74 +++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 28 deletions(-) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 0d8c2e2e9a6c..588d7e94e606 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -53,6 +53,11 @@ static bool enable_6lowpan; static struct l2cap_chan *listen_chan; static DEFINE_MUTEX(set_lock); +enum { + LOWPAN_PEER_CLOSING, + LOWPAN_PEER_MAXBITS +}; + struct lowpan_peer { struct list_head list; struct rcu_head rcu; @@ -61,6 +66,8 @@ struct lowpan_peer { /* peer addresses in various formats */ unsigned char lladdr[ETH_ALEN]; struct in6_addr peer_addr; + + DECLARE_BITMAP(flags, LOWPAN_PEER_MAXBITS); }; struct lowpan_btle_dev { @@ -1014,41 +1021,52 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, static void disconnect_all_peers(void) { struct lowpan_btle_dev *entry; - struct lowpan_peer *peer, *tmp_peer, *new_peer; - struct list_head peers; + struct lowpan_peer *peer; + int nchans; - INIT_LIST_HEAD(&peers); - - /* We make a separate list of peers as the close_cb() will - * modify the device peers list so it is better not to mess - * with the same list at the same time. + /* l2cap_chan_close() cannot be called from RCU, and lock ordering + * chan->lock > devices_lock prevents taking write side lock, so copy + * then close. */ rcu_read_lock(); - - list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { - list_for_each_entry_rcu(peer, &entry->peers, list) { - new_peer = kmalloc(sizeof(*new_peer), GFP_ATOMIC); - if (!new_peer) - break; - - new_peer->chan = peer->chan; - INIT_LIST_HEAD(&new_peer->list); - - list_add(&new_peer->list, &peers); - } - } - + list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) + list_for_each_entry_rcu(peer, &entry->peers, list) + clear_bit(LOWPAN_PEER_CLOSING, peer->flags); rcu_read_unlock(); - spin_lock(&devices_lock); - list_for_each_entry_safe(peer, tmp_peer, &peers, list) { - l2cap_chan_close(peer->chan, ENOENT); + do { + struct l2cap_chan *chans[32]; + int i; - list_del_rcu(&peer->list); - kfree_rcu(peer, rcu); - } - spin_unlock(&devices_lock); + nchans = 0; + + spin_lock(&devices_lock); + + list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { + list_for_each_entry_rcu(peer, &entry->peers, list) { + if (test_and_set_bit(LOWPAN_PEER_CLOSING, + peer->flags)) + continue; + + l2cap_chan_hold(peer->chan); + chans[nchans++] = peer->chan; + + if (nchans >= ARRAY_SIZE(chans)) + goto done; + } + } + +done: + spin_unlock(&devices_lock); + + for (i = 0; i < nchans; ++i) { + l2cap_chan_lock(chans[i]); + l2cap_chan_close(chans[i], ENOENT); + l2cap_chan_unlock(chans[i]); + l2cap_chan_put(chans[i]); + } + } while (nchans); } struct set_enable { From 15f32cabf426143ec1d2c8faf2bbca40c0fbd61b Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 3 Nov 2025 20:29:50 +0200 Subject: [PATCH 0933/1075] Bluetooth: 6lowpan: add missing l2cap_chan_lock() l2cap_chan_close() needs to be called in l2cap_chan_lock(), otherwise l2cap_le_sig_cmd() etc. may run concurrently. Add missing locks around l2cap_chan_close(). Fixes: 6b8d4a6a0314 ("Bluetooth: 6LoWPAN: Use connected oriented channel instead of fixed one") Signed-off-by: Pauli Virtanen Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/6lowpan.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 588d7e94e606..2c21ae8abadc 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -927,7 +927,9 @@ static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type) BT_DBG("peer %p chan %p", peer, peer->chan); + l2cap_chan_lock(peer->chan); l2cap_chan_close(peer->chan, ENOENT); + l2cap_chan_unlock(peer->chan); return 0; } @@ -1089,7 +1091,9 @@ static void do_enable_set(struct work_struct *work) mutex_lock(&set_lock); if (listen_chan) { + l2cap_chan_lock(listen_chan); l2cap_chan_close(listen_chan, 0); + l2cap_chan_unlock(listen_chan); l2cap_chan_put(listen_chan); } @@ -1148,7 +1152,9 @@ static ssize_t lowpan_control_write(struct file *fp, mutex_lock(&set_lock); if (listen_chan) { + l2cap_chan_lock(listen_chan); l2cap_chan_close(listen_chan, 0); + l2cap_chan_unlock(listen_chan); l2cap_chan_put(listen_chan); listen_chan = NULL; } @@ -1310,7 +1316,9 @@ static void __exit bt_6lowpan_exit(void) debugfs_remove(lowpan_control_debugfs); if (listen_chan) { + l2cap_chan_lock(listen_chan); l2cap_chan_close(listen_chan, 0); + l2cap_chan_unlock(listen_chan); l2cap_chan_put(listen_chan); } From 41bf23338a501e745c398e0faee948dd05d0be98 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 4 Nov 2025 17:02:04 -0500 Subject: [PATCH 0934/1075] Bluetooth: hci_conn: Fix not cleaning up PA_LINK connections Contrary to what was stated on d36349ea73d8 ("Bluetooth: hci_conn: Fix running bis_cleanup for hci_conn->type PA_LINK") the PA_LINK does in fact needs to run bis_cleanup in order to terminate the PA Sync, since that is bond to the listening socket which is the entity that controls the lifetime of PA Sync, so if it is closed/released the PA Sync shall be terminated, terminating the PA Sync shall not result in the BIG Sync being terminated since once the later is established it doesn't depend on the former anymore. If the use user wants to reconnect/rebind a number of BIS(s) it shall keep the socket open until it no longer needs the PA Sync, which means it retains full control of the lifetime of both PA and BIG Syncs. Fixes: d36349ea73d8 ("Bluetooth: hci_conn: Fix running bis_cleanup for hci_conn->type PA_LINK") Fixes: a7bcffc673de ("Bluetooth: Add PA_LINK to distinguish BIG sync and PA sync connections") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_conn.c | 33 +++++++++++++++++++-------------- net/bluetooth/hci_event.c | 7 +------ net/bluetooth/hci_sync.c | 2 +- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index c5dedf39a129..6fc0692abf05 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -769,21 +769,23 @@ static void find_bis(struct hci_conn *conn, void *data) d->count++; } -static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn) +static int hci_le_big_terminate(struct hci_dev *hdev, struct hci_conn *conn) { struct iso_list_data *d; int ret; - bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, conn->sync_handle); + bt_dev_dbg(hdev, "hcon %p big 0x%2.2x sync_handle 0x%4.4x", conn, + conn->iso_qos.bcast.big, conn->sync_handle); d = kzalloc(sizeof(*d), GFP_KERNEL); if (!d) return -ENOMEM; - d->big = big; + d->big = conn->iso_qos.bcast.big; d->sync_handle = conn->sync_handle; - if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { + if (conn->type == PA_LINK && + test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { hci_conn_hash_list_flag(hdev, find_bis, PA_LINK, HCI_CONN_PA_SYNC, d); @@ -801,6 +803,9 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *c d->big_sync_term = true; } + if (!d->pa_sync_term && !d->big_sync_term) + return 0; + ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d, terminate_big_destroy); if (ret) @@ -852,8 +857,7 @@ static void bis_cleanup(struct hci_conn *conn) hci_le_terminate_big(hdev, conn); } else { - hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, - conn); + hci_le_big_terminate(hdev, conn); } } @@ -994,19 +998,20 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; break; case CIS_LINK: - case BIS_LINK: - case PA_LINK: /* conn->src should reflect the local identity address */ hci_copy_identity_address(hdev, &conn->src, &conn->src_type); - /* set proper cleanup function */ - if (!bacmp(dst, BDADDR_ANY)) - conn->cleanup = bis_cleanup; - else if (conn->role == HCI_ROLE_MASTER) + if (conn->role == HCI_ROLE_MASTER) conn->cleanup = cis_cleanup; - conn->mtu = hdev->iso_mtu ? hdev->iso_mtu : - hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; + conn->mtu = hdev->iso_mtu; + break; + case PA_LINK: + case BIS_LINK: + /* conn->src should reflect the local identity address */ + hci_copy_identity_address(hdev, &conn->src, &conn->src_type); + conn->cleanup = bis_cleanup; + conn->mtu = hdev->iso_mtu; break; case SCO_LINK: if (lmp_esco_capable(hdev)) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f20c826509b6..03328c1dd090 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -7001,14 +7001,9 @@ static void hci_le_big_sync_established_evt(struct hci_dev *hdev, void *data, continue; } - if (ev->status != 0x42) { + if (ev->status != 0x42) /* Mark PA sync as established */ set_bit(HCI_CONN_PA_SYNC, &bis->flags); - /* Reset cleanup callback of PA Sync so it doesn't - * terminate the sync when deleting the connection. - */ - conn->cleanup = NULL; - } bis->sync_handle = conn->sync_handle; bis->iso_qos.bcast.big = ev->handle; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 73fc41b68b68..6e76798ec786 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6999,7 +6999,7 @@ static void create_pa_complete(struct hci_dev *hdev, void *data, int err) hci_dev_lock(hdev); - if (!hci_conn_valid(hdev, conn)) + if (hci_conn_valid(hdev, conn)) clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); if (!err) From b623390045a81fc559decb9bfeb79319721d3dfb Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Sun, 9 Nov 2025 09:05:08 -0800 Subject: [PATCH 0935/1075] NFS: Fix LTP test failures when timestamps are delegated The utimes01 and utime06 tests fail when delegated timestamps are enabled, specifically in subtests that modify the atime and mtime fields using the 'nobody' user ID. The problem can be reproduced as follow: # echo "/media *(rw,no_root_squash,sync)" >> /etc/exports # export -ra # mount -o rw,nfsvers=4.2 127.0.0.1:/media /tmpdir # cd /opt/ltp # ./runltp -d /tmpdir -s utimes01 # ./runltp -d /tmpdir -s utime06 This issue occurs because nfs_setattr does not verify the inode's UID against the caller's fsuid when delegated timestamps are permitted for the inode. This patch adds the UID check and if it does not match then the request is sent to the server for permission checking. Fixes: e12912d94137 ("NFSv4: Add support for delegated atime and mtime attributes") Signed-off-by: Dai Ngo Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 18b57c7c2f97..13ad70fc00d8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -718,6 +718,8 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct nfs_fattr *fattr; loff_t oldsize = i_size_read(inode); int error = 0; + kuid_t task_uid = current_fsuid(); + kuid_t owner_uid = inode->i_uid; nfs_inc_stats(inode, NFSIOS_VFSSETATTR); @@ -739,9 +741,11 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) { spin_lock(&inode->i_lock); if (attr->ia_valid & ATTR_MTIME_SET) { - nfs_set_timestamps_to_ts(inode, attr); - attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET| + if (uid_eq(task_uid, owner_uid)) { + nfs_set_timestamps_to_ts(inode, attr); + attr->ia_valid &= ~(ATTR_MTIME|ATTR_MTIME_SET| ATTR_ATIME|ATTR_ATIME_SET); + } } else { nfs_update_timestamps(inode, attr->ia_valid); attr->ia_valid &= ~(ATTR_MTIME|ATTR_ATIME); @@ -751,10 +755,12 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, attr->ia_valid & ATTR_ATIME && !(attr->ia_valid & ATTR_MTIME)) { if (attr->ia_valid & ATTR_ATIME_SET) { - spin_lock(&inode->i_lock); - nfs_set_timestamps_to_ts(inode, attr); - spin_unlock(&inode->i_lock); - attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET); + if (uid_eq(task_uid, owner_uid)) { + spin_lock(&inode->i_lock); + nfs_set_timestamps_to_ts(inode, attr); + spin_unlock(&inode->i_lock); + attr->ia_valid &= ~(ATTR_ATIME|ATTR_ATIME_SET); + } } else { nfs_update_delegated_atime(inode); attr->ia_valid &= ~ATTR_ATIME; From d3c9c213c0b86ac5dd8fe2c53c24db20f1f510bc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Nov 2025 14:30:41 -0700 Subject: [PATCH 0936/1075] io_uring/rw: ensure allocated iovec gets cleared for early failure A previous commit reused the recyling infrastructure for early cleanup, but this is not enough for the case where our internal caches have overflowed. If this happens, then the allocated iovec can get leaked if the request is also aborted early. Reinstate the previous forced free of the iovec for that situation. Cc: stable@vger.kernel.org Reported-by: syzbot+3c93637d7648c24e1fd0@syzkaller.appspotmail.com Tested-by: syzbot+3c93637d7648c24e1fd0@syzkaller.appspotmail.com Fixes: 9ac273ae3dc2 ("io_uring/rw: use io_rw_recycle() from cleanup path") Link: https://lore.kernel.org/io-uring/69122a59.a70a0220.22f260.00fd.GAE@google.com/ Signed-off-by: Jens Axboe --- io_uring/rw.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/io_uring/rw.c b/io_uring/rw.c index 5b2241a5813c..abe68ba9c9dc 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -463,7 +463,10 @@ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) void io_readv_writev_cleanup(struct io_kiocb *req) { + struct io_async_rw *rw = req->async_data; + lockdep_assert_held(&req->ctx->uring_lock); + io_vec_free(&rw->vec); io_rw_recycle(req, 0); } From 6a77267d97b5b6cd0e35099ab4eb054e5f965ee6 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 10 Nov 2025 13:03:53 +0000 Subject: [PATCH 0937/1075] io_uring/query: return number of available queries It's useful to know which query opcodes are available. Extend the structure and return that. It's a trivial change, and even though it can be painlessly extended later, it'd still require adding a v2 of the structure. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/uapi/linux/io_uring/query.h | 3 +++ io_uring/query.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/include/uapi/linux/io_uring/query.h b/include/uapi/linux/io_uring/query.h index 5d754322a27c..3539ccbfd064 100644 --- a/include/uapi/linux/io_uring/query.h +++ b/include/uapi/linux/io_uring/query.h @@ -36,6 +36,9 @@ struct io_uring_query_opcode { __u64 enter_flags; /* Bitmask of all supported IOSQE_* flags */ __u64 sqe_flags; + /* The number of available query opcodes */ + __u32 nr_query_opcodes; + __u32 __pad; }; #endif diff --git a/io_uring/query.c b/io_uring/query.c index 645301bd2c82..cf02893ba911 100644 --- a/io_uring/query.c +++ b/io_uring/query.c @@ -20,6 +20,8 @@ static ssize_t io_query_ops(void *data) e->ring_setup_flags = IORING_SETUP_FLAGS; e->enter_flags = IORING_ENTER_FLAGS; e->sqe_flags = SQE_VALID_FLAGS; + e->nr_query_opcodes = __IO_URING_QUERY_MAX; + e->__pad = 0; return sizeof(*e); } From 1534ff77757e44bcc4b98d0196bc5c0052fce5fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 6 Nov 2025 11:10:54 +0000 Subject: [PATCH 0938/1075] sctp: prevent possible shift-out-of-bounds in sctp_transport_update_rto syzbot reported a possible shift-out-of-bounds [1] Blamed commit added rto_alpha_max and rto_beta_max set to 1000. It is unclear if some sctp users are setting very large rto_alpha and/or rto_beta. In order to prevent user regression, perform the test at run time. Also add READ_ONCE() annotations as sysctl values can change under us. [1] UBSAN: shift-out-of-bounds in net/sctp/transport.c:509:41 shift exponent 64 is too large for 32-bit type 'unsigned int' CPU: 0 UID: 0 PID: 16704 Comm: syz.2.2320 Not tainted syzkaller #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/02/2025 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x16c/0x1f0 lib/dump_stack.c:120 ubsan_epilogue lib/ubsan.c:233 [inline] __ubsan_handle_shift_out_of_bounds+0x27f/0x420 lib/ubsan.c:494 sctp_transport_update_rto.cold+0x1c/0x34b net/sctp/transport.c:509 sctp_check_transmitted+0x11c4/0x1c30 net/sctp/outqueue.c:1502 sctp_outq_sack+0x4ef/0x1b20 net/sctp/outqueue.c:1338 sctp_cmd_process_sack net/sctp/sm_sideeffect.c:840 [inline] sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1372 [inline] Fixes: b58537a1f562 ("net: sctp: fix permissions for rto_alpha and rto_beta knobs") Reported-by: syzbot+f8c46c8b2b7f6e076e99@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/690c81ae.050a0220.3d0d33.014e.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Cc: Daniel Borkmann Acked-by: Xin Long Link: https://patch.msgid.link/20251106111054.3288127-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sctp/transport.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 0d48c61fe6ad..0c56d9673cc1 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -486,6 +486,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) if (tp->rttvar || tp->srtt) { struct net *net = tp->asoc->base.net; + unsigned int rto_beta, rto_alpha; /* 6.3.1 C3) When a new RTT measurement R' is made, set * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' @@ -497,10 +498,14 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt) * For example, assuming the default value of RTO.Alpha of * 1/8, rto_alpha would be expressed as 3. */ - tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) - + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta); - tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) - + (rtt >> net->sctp.rto_alpha); + rto_beta = READ_ONCE(net->sctp.rto_beta); + if (rto_beta < 32) + tp->rttvar = tp->rttvar - (tp->rttvar >> rto_beta) + + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> rto_beta); + rto_alpha = READ_ONCE(net->sctp.rto_alpha); + if (rto_alpha < 32) + tp->srtt = tp->srtt - (tp->srtt >> rto_alpha) + + (rtt >> rto_alpha); } else { /* 6.3.1 C2) When the first RTT measurement R is made, set * SRTT <- R, RTTVAR <- R/2. From e781122d76f018ad17752ab1018b3ffbf7fad84e Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 6 Nov 2025 17:56:20 -0300 Subject: [PATCH 0939/1075] net/sched: Abort __tc_modify_qdisc if parent is a clsact/ingress qdisc Wang reported an illegal configuration [1] where the user attempts to add a child qdisc to the ingress qdisc as follows: tc qdisc add dev eth0 handle ffff:0 ingress tc qdisc add dev eth0 handle ffe0:0 parent ffff:a fq To solve this, we reject any configuration attempt to add a child qdisc to ingress or clsact. [1] https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/ Fixes: 5e50da01d0ce ("[NET_SCHED]: Fix endless loops (part 2): "simple" qdiscs") Reported-by: Wang Liang Closes: https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/ Reviewed-by: Pedro Tammela Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Reviewed-by: Cong Wang Link: https://patch.msgid.link/20251106205621.3307639-1-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- net/sched/sch_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1e058b46d3e1..f56b18c8aebf 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1599,6 +1599,11 @@ static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } + if (p->flags & TCQ_F_INGRESS) { + NL_SET_ERR_MSG(extack, + "Cannot add children to ingress/clsact qdisc"); + return -EOPNOTSUPP; + } q = qdisc_leaf(p, clid, extack); if (IS_ERR(q)) return PTR_ERR(q); From 60260ad935861f6b8db7c65c23faa41c98d8fb15 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 6 Nov 2025 17:56:21 -0300 Subject: [PATCH 0940/1075] selftests/tc-testing: Create tests trying to add children to clsact/ingress qdiscs In response to Wang's bug report [1], add the following test cases: - Try and fail to add an fq child to an ingress qdisc - Try and fail to add an fq child to a clsact qdisc [1] https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/ Reviewed-by: Pedro Tammela Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Reviewed-by: Cong Wang Link: https://patch.msgid.link/20251106205621.3307639-2-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 998e5a2f4579..0091bcd91c2c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -961,5 +961,49 @@ "teardown": [ "$TC qdisc del dev $DUMMY root" ] + }, + { + "id": "4989", + "name": "Try to add an fq child to an ingress qdisc", + "category": [ + "qdisc", + "ingress" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY handle ffff:0 ingress" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress" + ] + }, + { + "id": "c2b0", + "name": "Try to add an fq child to a clsact qdisc", + "category": [ + "qdisc", + "ingress" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY handle ffff:0 clsact" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY clsact" + ] } ] From dd4adb986a86727ed8f56c48b6d0695f1e211e65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 Oct 2025 12:27:24 -0400 Subject: [PATCH 0941/1075] selftests/tracing: Run sample events to clear page cache events The tracing selftest "event-filter-function.tc" was failing because it first runs the "sample_events" function that triggers the kmem_cache_free event and it looks at what function was used during a call to "ls". But the first time it calls this, it could trigger events that are used to pull pages into the page cache. The rest of the test uses the function it finds during that call to see if it will be called in subsequent "sample_events" calls. But if there's no need to pull pages into the page cache, it will not trigger that function and the test will fail. Call the "sample_events" twice to trigger all the page cache work before it calls it to find a function to use in subsequent checks. Cc: stable@vger.kernel.org Fixes: eb50d0f250e96 ("selftests/ftrace: Choose target function for filter test from samples") Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Shuah Khan --- .../selftests/ftrace/test.d/filter/event-filter-function.tc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index c62165fabd0c..cfa16aa1f39a 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -20,6 +20,10 @@ sample_events() { echo 0 > tracing_on echo 0 > events/enable +# Clear functions caused by page cache; run sample_events twice +sample_events +sample_events + echo "Get the most frequently calling function" echo > trace sample_events From 762e7e174da91cf4babfe77e45bc6b67334b1503 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 9 Nov 2025 14:46:35 +0100 Subject: [PATCH 0942/1075] net: dsa: tag_brcm: do not mark link local traffic as offloaded Broadcom switches locally terminate link local traffic and do not forward it, so we should not mark it as offloaded. In some situations we still want/need to flood this traffic, e.g. if STP is disabled, or it is explicitly enabled via the group_fwd_mask. But if the skb is marked as offloaded, the kernel will assume this was already done in hardware, and the packets never reach other bridge ports. So ensure that link local traffic is never marked as offloaded, so that the kernel can forward/flood these packets in software if needed. Since the local termination in not configurable, check the destination MAC, and never mark packets as offloaded if it is a link local ether address. While modern switches set the tag reason code to BRCM_EG_RC_PROT_TERM for trapped link local traffic, they also set it for link local traffic that is flooded (01:80:c2:00:00:10 to 01:80:c2:00:00:2f), so we cannot use it and need to look at the destination address for them as well. Fixes: 964dbf186eaa ("net: dsa: tag_brcm: add support for legacy tags") Fixes: 0e62f543bed0 ("net: dsa: Fix duplicate frames flooded by learning") Signed-off-by: Jonas Gorski Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251109134635.243951-1-jonas.gorski@gmail.com Signed-off-by: Jakub Kicinski --- net/dsa/tag_brcm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index d9c77fa553b5..eadb358179ce 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -176,7 +176,8 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); - dsa_default_offload_fwd_mark(skb); + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); return skb; } @@ -250,7 +251,8 @@ static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, len); - dsa_default_offload_fwd_mark(skb); + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); dsa_strip_etype_header(skb, len); From 41d0c31be29fdee2535028ce70a6661e3a67bb25 Mon Sep 17 00:00:00 2001 From: Zahari Doychev Date: Thu, 6 Nov 2025 16:15:28 +0100 Subject: [PATCH 0943/1075] tools: ynl: call nested attribute free function for indexed arrays When freeing indexed arrays, the corresponding free function should be called for each entry of the indexed array. For example, for for 'struct tc_act_attrs' 'tc_act_attrs_free(...)' needs to be called for each entry. Previously, memory leaks were reported when enabling the ASAN analyzer. ================================================================= ==874==ERROR: LeakSanitizer: detected memory leaks Direct leak of 24 byte(s) in 1 object(s) allocated from: #0 0x7f221fd20cb5 in malloc ./debug/gcc/gcc/libsanitizer/asan/asan_malloc_linux.cpp:67 #1 0x55c98db048af in tc_act_attrs_set_options_vlan_parms ../generated/tc-user.h:2813 #2 0x55c98db048af in main ./linux/tools/net/ynl/samples/tc-filter-add.c:71 Direct leak of 24 byte(s) in 1 object(s) allocated from: #0 0x7f221fd20cb5 in malloc ./debug/gcc/gcc/libsanitizer/asan/asan_malloc_linux.cpp:67 #1 0x55c98db04a93 in tc_act_attrs_set_options_vlan_parms ../generated/tc-user.h:2813 #2 0x55c98db04a93 in main ./linux/tools/net/ynl/samples/tc-filter-add.c:74 Direct leak of 10 byte(s) in 2 object(s) allocated from: #0 0x7f221fd20cb5 in malloc ./debug/gcc/gcc/libsanitizer/asan/asan_malloc_linux.cpp:67 #1 0x55c98db0527d in tc_act_attrs_set_kind ../generated/tc-user.h:1622 SUMMARY: AddressSanitizer: 58 byte(s) leaked in 4 allocation(s). The following diff illustrates the changes introduced compared to the previous version of the code. void tc_flower_attrs_free(struct tc_flower_attrs *obj) { + unsigned int i; + free(obj->indev); + for (i = 0; i < obj->_count.act; i++) + tc_act_attrs_free(&obj->act[i]); free(obj->act); free(obj->key_eth_dst); free(obj->key_eth_dst_mask); Signed-off-by: Zahari Doychev Reviewed-by: Jakub Kicinski Link: https://patch.msgid.link/20251106151529.453026-3-zahari.doychev@linux.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ynl_gen_c.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 58086b101057..aadeb3abcad8 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -861,6 +861,18 @@ class TypeIndexedArray(Type): return [f"{member} = {self.c_name};", f"{presence} = n_{self.c_name};"] + def free_needs_iter(self): + return self.sub_type == 'nest' + + def _free_lines(self, ri, var, ref): + lines = [] + if self.sub_type == 'nest': + lines += [ + f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)", + f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', + ] + lines += f"free({var}->{ref}{self.c_name});", + return lines class TypeNestTypeValue(Type): def _complex_member_type(self, ri): From 2554559aba883803475e4ca4fae22eaad6d33d86 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 6 Nov 2025 19:02:52 +0100 Subject: [PATCH 0944/1075] bonding: fix mii_status when slave is down netif_carrier_ok() doesn't check if the slave is up. Before the below commit, netif_running() was also checked. Fixes: 23a6037ce76c ("bonding: Remove support for use_carrier") Signed-off-by: Nicolas Dichtel Acked-by: Jay Vosburgh Link: https://patch.msgid.link/20251106180252.3974772-1-nicolas.dichtel@6wind.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e95e593cd12d..5abef8a3b775 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2120,7 +2120,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, /* check for initial state */ new_slave->link = BOND_LINK_NOCHANGE; if (bond->params.miimon) { - if (netif_carrier_ok(slave_dev)) { + if (netif_running(slave_dev) && netif_carrier_ok(slave_dev)) { if (bond->params.updelay) { bond_set_slave_link_state(new_slave, BOND_LINK_BACK, @@ -2665,7 +2665,8 @@ static int bond_miimon_inspect(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { bond_propose_link_state(slave, BOND_LINK_NOCHANGE); - link_state = netif_carrier_ok(slave->dev); + link_state = netif_running(slave->dev) && + netif_carrier_ok(slave->dev); switch (slave->link) { case BOND_LINK_UP: From ec33f2e5a2d0dbbfd71435209aee812fdc9369b8 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 7 Nov 2025 10:40:29 +0800 Subject: [PATCH 0945/1075] net/smc: fix mismatch between CLC header and proposal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current CLC proposal message construction uses a mix of `ini->smc_type_v1/v2` and `pclc_base->hdr.typev1/v2` to decide whether to include optional extensions (IPv6 prefix extension for v1, and v2 extension). This leads to a critical inconsistency: when `smc_clc_prfx_set()` fails - for example, in IPv6-only environments with only link-local addresses, or when the local IP address and the outgoing interface’s network address are not in the same subnet. As a result, the proposal message is assembled using the stale `ini->smc_type_v1` value—causing the IPv6 prefix extension to be included even though the header indicates v1 is not supported. The peer then receives a malformed CLC proposal where the header type does not match the payload, and immediately resets the connection. The fix ensures consistency between the CLC header flags and the actual payload by synchronizing `ini->smc_type_v1` with `pclc_base->hdr.typev1` when prefix setup fails. Fixes: 8c3dca341aea ("net/smc: build and send V2 CLC proposal") Signed-off-by: D. Wythe Reviewed-by: Alexandra Winter Link: https://patch.msgid.link/20251107024029.88753-1-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_clc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 157aace169d4..87c87edadde7 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -890,6 +890,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) return SMC_CLC_DECL_CNFERR; } pclc_base->hdr.typev1 = SMC_TYPE_N; + ini->smc_type_v1 = SMC_TYPE_N; } else { pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); plen += sizeof(*pclc_prfx) + From 3072f00bba764082fa41b3c3a2a7b013335353d2 Mon Sep 17 00:00:00 2001 From: Zilin Guan Date: Thu, 6 Nov 2025 14:45:11 +0000 Subject: [PATCH 0946/1075] net/handshake: Fix memory leak in tls_handshake_accept() In tls_handshake_accept(), a netlink message is allocated using genlmsg_new(). In the error handling path, genlmsg_cancel() is called to cancel the message construction, but the message itself is not freed. This leads to a memory leak. Fix this by calling nlmsg_free() in the error path after genlmsg_cancel() to release the allocated memory. Fixes: 2fd5532044a89 ("net/handshake: Add a kernel API for requesting a TLSv1.3 handshake") Signed-off-by: Zilin Guan Reviewed-by: Chuck Lever Link: https://patch.msgid.link/20251106144511.3859535-1-zilin@seu.edu.cn Signed-off-by: Jakub Kicinski --- net/handshake/tlshd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c index 081093dfd553..8f9532a15f43 100644 --- a/net/handshake/tlshd.c +++ b/net/handshake/tlshd.c @@ -259,6 +259,7 @@ static int tls_handshake_accept(struct handshake_req *req, out_cancel: genlmsg_cancel(msg, hdr); + nlmsg_free(msg); out: return ret; } From 49b3916465176a5abcb29a0e464825f553d55d58 Mon Sep 17 00:00:00 2001 From: Aksh Garg Date: Thu, 6 Nov 2025 14:53:04 +0530 Subject: [PATCH 0947/1075] net: ethernet: ti: am65-cpsw-qos: fix IET verify/response timeout The CPSW module uses the MAC_VERIFY_CNT bit field in the CPSW_PN_IET_VERIFY_REG_k register to set the verify/response timeout count. This register specifies the number of clock cycles to wait before resending a verify packet if the verification fails. The verify/response timeout count, as being set by the function am65_cpsw_iet_set_verify_timeout_count() is hardcoded for 125MHz clock frequency, which varies based on PHY mode and link speed. The respective clock frequencies are as follows: - RGMII mode: * 1000 Mbps: 125 MHz * 100 Mbps: 25 MHz * 10 Mbps: 2.5 MHz - QSGMII/SGMII mode: 125 MHz (all speeds) Fix this by adding logic to calculate the correct timeout counts based on the actual PHY interface mode and link speed. Fixes: 49a2eb9068246 ("net: ethernet: ti: am65-cpsw-qos: Add Frame Preemption MAC Merge support") Signed-off-by: Aksh Garg Link: https://patch.msgid.link/20251106092305.1437347-2-a-garg7@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-qos.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index fa96db7c1a13..ad06942ce461 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -276,9 +276,31 @@ static int am65_cpsw_iet_set_verify_timeout_count(struct am65_cpsw_port *port) /* The number of wireside clocks contained in the verify * timeout counter. The default is 0x1312d0 * (10ms at 125Mhz in 1G mode). + * The frequency of the clock depends on the link speed + * and the PHY interface. */ - val = 125 * HZ_PER_MHZ; /* assuming 125MHz wireside clock */ + switch (port->slave.phy_if) { + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + if (port->qos.link_speed == SPEED_1000) + val = 125 * HZ_PER_MHZ; /* 125 MHz at 1000Mbps*/ + else if (port->qos.link_speed == SPEED_100) + val = 25 * HZ_PER_MHZ; /* 25 MHz at 100Mbps*/ + else + val = (25 * HZ_PER_MHZ) / 10; /* 2.5 MHz at 10Mbps*/ + break; + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_SGMII: + val = 125 * HZ_PER_MHZ; /* 125 MHz */ + break; + + default: + netdev_err(port->ndev, "selected mode does not supported IET\n"); + return -EOPNOTSUPP; + } val /= MILLIHZ_PER_HZ; /* count per ms timeout */ val *= verify_time_ms; /* count for timeout ms */ From d4b00d132d7cb70a74bc039c91c1d6120943c71b Mon Sep 17 00:00:00 2001 From: Aksh Garg Date: Thu, 6 Nov 2025 14:53:05 +0530 Subject: [PATCH 0948/1075] net: ethernet: ti: am65-cpsw-qos: fix IET verify retry mechanism The am65_cpsw_iet_verify_wait() function attempts verification 20 times, toggling the AM65_CPSW_PN_IET_MAC_LINKFAIL bit in each iteration. When the LINKFAIL bit transitions from 1 to 0, the MAC merge layer initiates the verification process and waits for the timeout configured in MAC_VERIFY_CNT before automatically retransmitting. The MAC_VERIFY_CNT register is configured according to the user-defined verify/response timeout in am65_cpsw_iet_set_verify_timeout_count(). As per IEEE 802.3 Clause 99, the hardware performs this automatic retry up to 3 times. Current implementation toggles LINKFAIL after the user-configured verify/response timeout in each iteration, forcing the hardware to restart verification instead of respecting the MAC_VERIFY_CNT timeout. This bypasses the hardware's automatic retry mechanism. Fix this by moving the LINKFAIL bit toggle outside the retry loop and reducing the retry count from 20 to 3. The software now only monitors the status register while the hardware autonomously handles the 3 verification attempts at proper MAC_VERIFY_CNT intervals. Fixes: 49a2eb9068246 ("net: ethernet: ti: am65-cpsw-qos: Add Frame Preemption MAC Merge support") Signed-off-by: Aksh Garg Link: https://patch.msgid.link/20251106092305.1437347-3-a-garg7@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/am65-cpsw-qos.c | 29 +++++++++++++------------ 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c index ad06942ce461..66e8b224827b 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-qos.c +++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c @@ -317,20 +317,21 @@ static int am65_cpsw_iet_verify_wait(struct am65_cpsw_port *port) u32 ctrl, status; int try; - try = 20; + try = 3; + + /* Reset the verify state machine by writing 1 + * to LINKFAIL + */ + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + ctrl |= AM65_CPSW_PN_IET_MAC_LINKFAIL; + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + + /* Clear MAC_LINKFAIL bit to start Verify. */ + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + ctrl &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); + do { - /* Reset the verify state machine by writing 1 - * to LINKFAIL - */ - ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); - ctrl |= AM65_CPSW_PN_IET_MAC_LINKFAIL; - writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); - - /* Clear MAC_LINKFAIL bit to start Verify. */ - ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); - ctrl &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; - writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); - msleep(port->qos.iet.verify_time_ms); status = readl(port->port_base + AM65_CPSW_PN_REG_IET_STATUS); @@ -352,7 +353,7 @@ static int am65_cpsw_iet_verify_wait(struct am65_cpsw_port *port) netdev_dbg(port->ndev, "MAC Merge verify error\n"); return -ENODEV; } - } while (try-- > 0); + } while (--try > 0); netdev_dbg(port->ndev, "MAC Merge verify timeout\n"); return -ETIMEDOUT; From 0725e6afb55128be21a2ca36e9674f573ccec173 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 7 Nov 2025 06:40:25 +0000 Subject: [PATCH 0949/1075] tipc: Fix use-after-free in tipc_mon_reinit_self(). syzbot reported use-after-free of tipc_net(net)->monitors[] in tipc_mon_reinit_self(). [0] The array is protected by RTNL, but tipc_mon_reinit_self() iterates over it without RTNL. tipc_mon_reinit_self() is called from tipc_net_finalize(), which is always under RTNL except for tipc_net_finalize_work(). Let's hold RTNL in tipc_net_finalize_work(). [0]: BUG: KASAN: slab-use-after-free in __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] BUG: KASAN: slab-use-after-free in _raw_spin_lock_irqsave+0xa7/0xf0 kernel/locking/spinlock.c:162 Read of size 1 at addr ffff88805eae1030 by task kworker/0:7/5989 CPU: 0 UID: 0 PID: 5989 Comm: kworker/0:7 Not tainted syzkaller #0 PREEMPT_{RT,(full)} Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025 Workqueue: events tipc_net_finalize_work Call Trace: dump_stack_lvl+0x189/0x250 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:378 [inline] print_report+0xca/0x240 mm/kasan/report.c:482 kasan_report+0x118/0x150 mm/kasan/report.c:595 __kasan_check_byte+0x2a/0x40 mm/kasan/common.c:568 kasan_check_byte include/linux/kasan.h:399 [inline] lock_acquire+0x8d/0x360 kernel/locking/lockdep.c:5842 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline] _raw_spin_lock_irqsave+0xa7/0xf0 kernel/locking/spinlock.c:162 rtlock_slowlock kernel/locking/rtmutex.c:1894 [inline] rwbase_rtmutex_lock_state kernel/locking/spinlock_rt.c:160 [inline] rwbase_write_lock+0xd3/0x7e0 kernel/locking/rwbase_rt.c:244 rt_write_lock+0x76/0x110 kernel/locking/spinlock_rt.c:243 write_lock_bh include/linux/rwlock_rt.h:99 [inline] tipc_mon_reinit_self+0x79/0x430 net/tipc/monitor.c:718 tipc_net_finalize+0x115/0x190 net/tipc/net.c:140 process_one_work kernel/workqueue.c:3236 [inline] process_scheduled_works+0xade/0x17b0 kernel/workqueue.c:3319 worker_thread+0x8a0/0xda0 kernel/workqueue.c:3400 kthread+0x70e/0x8a0 kernel/kthread.c:463 ret_from_fork+0x439/0x7d0 arch/x86/kernel/process.c:148 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:245 Allocated by task 6089: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 poison_kmalloc_redzone mm/kasan/common.c:388 [inline] __kasan_kmalloc+0x93/0xb0 mm/kasan/common.c:405 kasan_kmalloc include/linux/kasan.h:260 [inline] __kmalloc_cache_noprof+0x1a8/0x320 mm/slub.c:4407 kmalloc_noprof include/linux/slab.h:905 [inline] kzalloc_noprof include/linux/slab.h:1039 [inline] tipc_mon_create+0xc3/0x4d0 net/tipc/monitor.c:657 tipc_enable_bearer net/tipc/bearer.c:357 [inline] __tipc_nl_bearer_enable+0xe16/0x13f0 net/tipc/bearer.c:1047 __tipc_nl_compat_doit net/tipc/netlink_compat.c:371 [inline] tipc_nl_compat_doit+0x3bc/0x5f0 net/tipc/netlink_compat.c:393 tipc_nl_compat_handle net/tipc/netlink_compat.c:-1 [inline] tipc_nl_compat_recv+0x83c/0xbe0 net/tipc/netlink_compat.c:1321 genl_family_rcv_msg_doit+0x215/0x300 net/netlink/genetlink.c:1115 genl_family_rcv_msg net/netlink/genetlink.c:1195 [inline] genl_rcv_msg+0x60e/0x790 net/netlink/genetlink.c:1210 netlink_rcv_skb+0x208/0x470 net/netlink/af_netlink.c:2552 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1219 netlink_unicast_kernel net/netlink/af_netlink.c:1320 [inline] netlink_unicast+0x846/0xa10 net/netlink/af_netlink.c:1346 netlink_sendmsg+0x805/0xb30 net/netlink/af_netlink.c:1896 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg+0x21c/0x270 net/socket.c:729 ____sys_sendmsg+0x508/0x820 net/socket.c:2614 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668 __sys_sendmsg net/socket.c:2700 [inline] __do_sys_sendmsg net/socket.c:2705 [inline] __se_sys_sendmsg net/socket.c:2703 [inline] __x64_sys_sendmsg+0x1a1/0x260 net/socket.c:2703 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 6088: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x3e/0x80 mm/kasan/common.c:68 kasan_save_free_info+0x46/0x50 mm/kasan/generic.c:576 poison_slab_object mm/kasan/common.c:243 [inline] __kasan_slab_free+0x5b/0x80 mm/kasan/common.c:275 kasan_slab_free include/linux/kasan.h:233 [inline] slab_free_hook mm/slub.c:2422 [inline] slab_free mm/slub.c:4695 [inline] kfree+0x195/0x550 mm/slub.c:4894 tipc_l2_device_event+0x380/0x650 net/tipc/bearer.c:-1 notifier_call_chain+0x1b3/0x3e0 kernel/notifier.c:85 call_netdevice_notifiers_extack net/core/dev.c:2267 [inline] call_netdevice_notifiers net/core/dev.c:2281 [inline] unregister_netdevice_many_notify+0x14d7/0x1fe0 net/core/dev.c:12166 unregister_netdevice_many net/core/dev.c:12229 [inline] unregister_netdevice_queue+0x33c/0x380 net/core/dev.c:12073 unregister_netdevice include/linux/netdevice.h:3385 [inline] __tun_detach+0xe4d/0x1620 drivers/net/tun.c:621 tun_detach drivers/net/tun.c:637 [inline] tun_chr_close+0x10d/0x1c0 drivers/net/tun.c:3433 __fput+0x458/0xa80 fs/file_table.c:468 task_work_run+0x1d4/0x260 kernel/task_work.c:227 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] exit_to_user_mode_loop+0xec/0x110 kernel/entry/common.c:43 exit_to_user_mode_prepare include/linux/irq-entry-common.h:225 [inline] syscall_exit_to_user_mode_work include/linux/entry-common.h:175 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:210 [inline] do_syscall_64+0x2bd/0x3b0 arch/x86/entry/syscall_64.c:100 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 46cb01eeeb86 ("tipc: update mon's self addr when node addr generated") Reported-by: syzbot+d7dad7fd4b3921104957@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/690c323a.050a0220.baf87.007f.GAE@google.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107064038.2361188-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- net/tipc/net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/net.c b/net/tipc/net.c index 0e95572e56b4..7e65d0b0c4a8 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -145,7 +145,9 @@ void tipc_net_finalize_work(struct work_struct *work) { struct tipc_net *tn = container_of(work, struct tipc_net, work); + rtnl_lock(); tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); + rtnl_unlock(); } void tipc_net_stop(struct net *net) From e6ca8f533ed41129fcf052297718f417f021cc7d Mon Sep 17 00:00:00 2001 From: Buday Csaba Date: Sat, 8 Nov 2025 07:49:22 +0100 Subject: [PATCH 0950/1075] net: mdio: fix resource leak in mdiobus_register_device() Fix a possible leak in mdiobus_register_device() when both a reset-gpio and a reset-controller are present. Clean up the already claimed reset-gpio, when the registration of the reset-controller fails, so when an error code is returned, the device retains its state before the registration attempt. Link: https://lore.kernel.org/all/20251106144603.39053c81@kernel.org/ Fixes: 71dd6c0dff51 ("net: phy: add support for reset-controller") Signed-off-by: Buday Csaba Link: https://patch.msgid.link/4b419377f8dd7d2f63f919d0f74a336c734f8fff.1762584481.git.buday.csaba@prolan.hu Signed-off-by: Jakub Kicinski --- drivers/net/phy/mdio_bus.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index cad6ed3aa10b..4354241137d5 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -73,8 +73,11 @@ int mdiobus_register_device(struct mdio_device *mdiodev) return err; err = mdiobus_register_reset(mdiodev); - if (err) + if (err) { + gpiod_put(mdiodev->reset_gpio); + mdiodev->reset_gpio = NULL; return err; + } /* Assert the reset signal */ mdio_device_reset(mdiodev, 1); From 49c8d2c1f94cc2f4d1a108530d7ba52614b874c2 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:37 -0800 Subject: [PATCH 0951/1075] net: netpoll: fix incorrect refcount handling causing incorrect cleanup commit efa95b01da18 ("netpoll: fix use after free") incorrectly ignored the refcount and prematurely set dev->npinfo to NULL during netpoll cleanup, leading to improper behavior and memory leaks. Scenario causing lack of proper cleanup: 1) A netpoll is associated with a NIC (e.g., eth0) and netdev->npinfo is allocated, and refcnt = 1 - Keep in mind that npinfo is shared among all netpoll instances. In this case, there is just one. 2) Another netpoll is also associated with the same NIC and npinfo->refcnt += 1. - Now dev->npinfo->refcnt = 2; - There is just one npinfo associated to the netdev. 3) When the first netpolls goes to clean up: - The first cleanup succeeds and clears np->dev->npinfo, ignoring refcnt. - It basically calls `RCU_INIT_POINTER(np->dev->npinfo, NULL);` - Set dev->npinfo = NULL, without proper cleanup - No ->ndo_netpoll_cleanup() is either called 4) Now the second target tries to clean up - The second cleanup fails because np->dev->npinfo is already NULL. * In this case, ops->ndo_netpoll_cleanup() was never called, and the skb pool is not cleaned as well (for the second netpoll instance) - This leaks npinfo and skbpool skbs, which is clearly reported by kmemleak. Revert commit efa95b01da18 ("netpoll: fix use after free") and adds clarifying comments emphasizing that npinfo cleanup should only happen once the refcount reaches zero, ensuring stable and correct netpoll behavior. Cc: # 3.17.x Cc: Jay Vosburgh Fixes: efa95b01da18 ("netpoll: fix use after free") Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-1-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c85f740065fc..331764845e8f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -811,6 +811,10 @@ static void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; + /* At this point, there is a single npinfo instance per netdevice, and + * its refcnt tracks how many netpoll structures are linked to it. We + * only perform npinfo cleanup when the refcnt decrements to zero. + */ if (refcount_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; @@ -820,8 +824,7 @@ static void __netpoll_cleanup(struct netpoll *np) RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); - } else - RCU_INIT_POINTER(np->dev->npinfo, NULL); + } skb_pool_flush(np); } From 39acc6a95eefcf814efa226d8813f89e7e03496e Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:38 -0800 Subject: [PATCH 0952/1075] selftest: netcons: refactor target creation Extract the netconsole target creation from create_dynamic_target(), by moving it from create_dynamic_target() into a new helper function. This enables other tests to use the creation of netconsole targets with arbitrary parameters and no sleep. The new helper will be utilized by forthcoming torture-type selftests that require dynamic target management. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-2-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- .../drivers/net/lib/sh/lib_netcons.sh | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 8e1085e89647..9b5ef8074440 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -113,31 +113,39 @@ function set_network() { configure_ip } -function create_dynamic_target() { - local FORMAT=${1:-"extended"} +function _create_dynamic_target() { + local FORMAT="${1:?FORMAT parameter required}" + local NCPATH="${2:?NCPATH parameter required}" DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') # Create a dynamic target - mkdir "${NETCONS_PATH}" + mkdir "${NCPATH}" - echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip - echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip - echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac - echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + echo "${DSTIP}" > "${NCPATH}"/remote_ip + echo "${SRCIP}" > "${NCPATH}"/local_ip + echo "${DSTMAC}" > "${NCPATH}"/remote_mac + echo "${SRCIF}" > "${NCPATH}"/dev_name if [ "${FORMAT}" == "basic" ] then # Basic target does not support release - echo 0 > "${NETCONS_PATH}"/release - echo 0 > "${NETCONS_PATH}"/extended + echo 0 > "${NCPATH}"/release + echo 0 > "${NCPATH}"/extended elif [ "${FORMAT}" == "extended" ] then - echo 1 > "${NETCONS_PATH}"/extended + echo 1 > "${NCPATH}"/extended fi - echo 1 > "${NETCONS_PATH}"/enabled + echo 1 > "${NCPATH}"/enabled + +} + +function create_dynamic_target() { + local FORMAT=${1:-"extended"} + local NCPATH=${2:-"$NETCONS_PATH"} + _create_dynamic_target "${FORMAT}" "${NCPATH}" # This will make sure that the kernel was able to # load the netconsole driver configuration. The console message From 6701896eb90998ff16338f199144bd9deefb79ba Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:39 -0800 Subject: [PATCH 0953/1075] selftest: netcons: create a torture test Create a netconsole test that puts a lot of pressure on the netconsole list manipulation. Do it by creating dynamic targets and deleting targets while messages are being sent. Also put interface down while the messages are being sent, as creating parallel targets. The code launches three background jobs on distinct schedules: * Toggle netcons target every 30 iterations * create and delete random_target every 50 iterations * toggle iface every 70 iterations This creates multiple concurrency sources that interact with netconsole states. This is good practice to simulate stress, and exercise netpoll and netconsole locks. This test already found an issue as reported in [1] Link: https://lore.kernel.org/all/20250901-netpoll_memleak-v1-1-34a181977dfc@debian.org/ [1] Signed-off-by: Breno Leitao Reviewed-by: Andre Carvalho Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-3-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/Makefile | 1 + .../selftests/drivers/net/netcons_torture.sh | 130 ++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netcons_torture.sh diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 6e41635bd55a..71ee69e524d7 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -18,6 +18,7 @@ TEST_PROGS := \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ netcons_sysdata.sh \ + netcons_torture.sh \ netpoll_basic.py \ ping.py \ psp.py \ diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh new file mode 100755 index 000000000000..2ce9ee3719d1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_torture.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Repeatedly send kernel messages, toggles netconsole targets on and off, +# creates and deletes targets in parallel, and toggles the source interface to +# simulate stress conditions. +# +# This test aims to verify the robustness of netconsole under dynamic +# configurations and concurrent operations. +# +# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make +# sure no issues is reported. +# +# Author: Breno Leitao + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +# Number of times the main loop run +ITERATIONS=${1:-150} + +# Only test extended format +FORMAT="extended" +# And ipv6 only +IP_VERSION="ipv6" + +# Create, enable and delete some targets. +create_and_delete_random_target() { + COUNT=2 + RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_) + + if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \ + [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then + echo "Function didn't finish yet, skipping it." >&2 + return + fi + + # enable COUNT targets + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + + # Basic population so the target can come up + _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}" + done + + echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg + # disable them all + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]] + then + echo 0 > "${RND_TARGET_PATH}"/enabled + fi + rmdir "${RND_TARGET_PATH}" + done +} + +# Disable and enable the target mid-air, while messages +# are being transmitted. +toggle_netcons_target() { + for i in $(seq 2) + do + if [ ! -d "${NETCONS_PATH}" ] + then + break + fi + echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + # Try to enable a bit harder, given it might fail to enable + # Write to `enabled` might fail depending on the lock, which is + # highly contentious here + for _ in $(seq 5) + do + echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + done + done +} + +toggle_iface(){ + ip link set "${SRCIF}" down + ip link set "${SRCIF}" up +} + +# Start here + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network "${IP_VERSION}" +# Create a dynamic target for netconsole +create_dynamic_target "${FORMAT}" + +for i in $(seq "$ITERATIONS") +do + for _ in $(seq 10) + do + echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg + done + wait + + if (( i % 30 == 0 )); then + toggle_netcons_target & + fi + + if (( i % 50 == 0 )); then + # create some targets, enable them, send msg and disable + # all in a parallel thread + create_and_delete_random_target & + fi + + if (( i % 70 == 0 )); then + toggle_iface & + fi +done +wait + +exit "${EXIT_STATUS}" From 236682db3b6fe71cad76ac5e920ea4c14a33178e Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:40 -0800 Subject: [PATCH 0954/1075] selftest: netcons: add test for netconsole over bonded interfaces This patch adds a selftest that verifies netconsole functionality over bonded network interfaces using netdevsim. It sets up two bonded interfaces acting as transmit (TX) and receive (RX) ends, placed in separate network namespaces. The test sends kernel log messages and verifies that they are properly received on the bonded RX interfaces with both IPv4 and IPv6, and using basic and extended netconsole formats. This patchset aims to test a long-standing netpoll subsystem where netpoll has multiple users. (in this case netconsole and bonding). A similar selftest has been discussed in [1] and [2]. This test also tries to enable bonding and netpoll in different order, just to guarantee that all the possibilities are exercised. Link: https://lore.kernel.org/all/20250905-netconsole_torture-v3-0-875c7febd316@debian.org/ [1] Link: https://lore.kernel.org/lkml/96b940137a50e5c387687bb4f57de8b0435a653f.1404857349.git.decot@googlers.com/ [2] Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-4-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/bonding/Makefile | 2 + .../selftests/drivers/net/bonding/config | 4 + .../net/bonding/netcons_over_bonding.sh | 361 ++++++++++++++++++ .../drivers/net/lib/sh/lib_netcons.sh | 54 ++- 4 files changed, 414 insertions(+), 7 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 402d4ee84f2e..6c5c60adb5e8 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -14,6 +14,7 @@ TEST_PROGS := \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ + netcons_over_bonding.sh \ # end of TEST_PROGS TEST_FILES := \ @@ -24,6 +25,7 @@ TEST_FILES := \ TEST_INCLUDES := \ ../../../net/lib.sh \ + ../lib/sh/lib_netcons.sh \ ../../../net/forwarding/lib.sh \ # end of TEST_INCLUDES diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 6bb290abd48b..991494376223 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -1,5 +1,6 @@ CONFIG_BONDING=y CONFIG_BRIDGE=y +CONFIG_CONFIGFS_FS=y CONFIG_DUMMY=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y @@ -9,6 +10,9 @@ CONFIG_MACVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y CONFIG_NET_CLS_MATCHALL=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y CONFIG_NETDEVSIM=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh new file mode 100755 index 000000000000..477cc9379500 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh @@ -0,0 +1,361 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 +# +# This selftest exercises trying to have multiple netpoll users at the same +# time. +# +# This selftest has multiple smalls test inside, and the goal is to +# get interfaces with bonding and netconsole in different orders in order +# to catch any possible issue. +# +# The main test composes of four interfaces being created using netdevsim; two +# of them are bonded to serve as the netconsole's transmit interface. The +# remaining two interfaces are similarly bonded and assigned to a separate +# network namespace, which acts as the receive interface, where socat monitors +# for incoming messages. +# +# A netconsole message is then sent to ensure it is properly received across +# this configuration. +# +# Later, run a few other tests, to make sure that bonding and netconsole +# cannot coexist. +# +# The test's objective is to exercise netpoll usage when managed simultaneously +# by multiple subsystems (netconsole and bonding). +# +# Author: Breno Leitao + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true +modprobe bonding 2> /dev/null || true +modprobe veth 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup_bond EXIT + +FORMAT="extended" +IP_VERSION="ipv4" +VETH0="veth"$(( RANDOM % 256)) +VETH1="veth"$((256 + RANDOM % 256)) +TXNS="" +RXNS="" + +# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with +# the bonding interfaces +function setup_bonding_ifaces() { + local RAND=$(( RANDOM % 100 )) + BOND_TX_MAIN_IF="bond_tx_$RAND" + BOND_RX_MAIN_IF="bond_rx_$RAND" + + # Setup TX + if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + then + echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2 + # only clean nsim ifaces and namespace. Nothing else has been + # initialized + cleanup_bond_nsim + trap - EXIT + exit "${ksft_skip}" + fi + + # create_netdevsim() got the interface up, but it needs to be down + # before being enslaved. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # Setup RX + ip -n "${RXNS}" \ + link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX_MAIN_IF}" up + + export DSTIF="${BOND_RX_MAIN_IF}" + export SRCIF="${BOND_TX_MAIN_IF}" +} + +# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface +# and the other two will be bond to the RX interface (on the other namespace) +function create_ifaces_bond() { + BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}") + BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}") + BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}") + BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}") +} + +# netdevsim link BOND_TX to BOND_RX interfaces +function link_ifaces_bond() { + local BOND_TX1_SLAVE_IFIDX + local BOND_TX2_SLAVE_IFIDX + local BOND_RX1_SLAVE_IFIDX + local BOND_RX2_SLAVE_IFIDX + local TXNS_FD + local RXNS_FD + + BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex) + BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex) + BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex) + BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex) + + exec {TXNS_FD} "$NSIM_DEV_SYS_LINK" + echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \ + > "$NSIM_DEV_SYS_LINK" + + exec {TXNS_FD}<&- + exec {RXNS_FD}<&- +} + +function create_all_ifaces() { + # setup_ns function is coming from lib.sh + setup_ns TXNS RXNS + export NAMESPACE="${RXNS}" + + # Create two interfaces for RX and two for TX + create_ifaces_bond + # Link netlink ifaces + link_ifaces_bond +} + +# configure DSTIF and SRCIF IPs +function configure_ifaces_ips() { + local IP_VERSION=${1:-"ipv4"} + select_ipv4_or_ipv6 "${IP_VERSION}" + + ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}" + ip -n "${RXNS}" link set "${DSTIF}" up + + ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}" + ip -n "${TXNS}" link set "${SRCIF}" up +} + +function test_enable_netpoll_on_enslaved_iface() { + echo 0 > "${NETCONS_PATH}"/enabled + + # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and + # linked to BOND_RX1_SLAVE_IF inside the namespace. + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # This should fail with the following message in dmesg: + # netpoll: netconsole: ethX is a slave device, aborting + set +e + enable_netcons_ns 2> /dev/null + set -e + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Bonding and netpoll cannot co-exists." >&2 + exit "${ksft_fail}" + fi +} + +function test_delete_bond_and_reenable_target() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond + + # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore + # netpoll can be plugged in there + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # this should work, since the interface is not enslaved + enable_netcons_ns + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Unable to start netpoll on an unbond iface." >&2 + exit "${ksft_fail}" + fi +} + +# Send a netconsole message to the netconsole target +function test_send_netcons_msg_through_bond_iface() { + # Listen for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat +} + +# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF. +# Given BOND_TX_MAIN_IF was deleted, recreate it first +function test_enslave_netcons_enabled_iface { + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2 + exit "${ksft_fail}" + fi + + # recreate the bonding iface. it got deleted by previous + # test (test_delete_bond_and_reenable_target) + ip -n "${TXNS}" \ + link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + + # sub-interface need to be down before attaching to bonding + # This will also disable netconsole. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2 + exit "${ksft_fail}" + fi +} + +# Get netconsole enabled on a bonding interface and attach a second +# sub-interface. +function test_enslave_iface_to_bond { + # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now + echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name + enable_netcons_ns + + # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is + # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF. + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2 + exit "${ksft_fail}" + fi +} + +function test_enslave_iff_disabled_netpoll_iface { + local ret + + # Create two interfaces. veth interfaces it known to have + # IFF_DISABLE_NETPOLL set + if ! ip link add "${VETH0}" type veth peer name "${VETH1}" + then + echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2 + exit "${ksft_skip}" + fi + set +e + # This will print RTNETLINK answers: Device or resource busy + ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null + ret=$? + set -e + if [[ $ret -eq 0 ]] + then + echo "test failed: veth interface could not be enslaved" + exit "${ksft_fail}" + fi +} + +# Given that netconsole picks the current net namespace, we need to enable it +# from inside the TXNS namespace +function enable_netcons_ns() { + ip netns exec "${TXNS}" sh -c \ + "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled" +} + +#################### +# Tests start here # +#################### + +# Create regular interfaces using netdevsim and link them +create_all_ifaces + +# Setup the bonding interfaces +# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF +# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF +setup_bonding_ifaces + +# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF +configure_ifaces_ips "${IP_VERSION}" + +_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}" +enable_netcons_ns +set_user_data + +# Test #1 : Create an bonding interface and attach netpoll into +# the bonding interface. Netconsole/netpoll should work on +# the bonding interface. +test_send_netcons_msg_through_bond_iface +echo "test #1: netpoll on bonding interface worked. Test passed" >&2 + +# Test #2: Attach netpoll to an enslaved interface +# Try to attach netpoll to an enslaved sub-interface (while still being part of +# a bonding interface), which shouldn't be allowed +test_enable_netpoll_on_enslaved_iface +echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2 + +# Test #3: Unplug the sub-interface from bond and enable netconsole +# Detach the interface from a bonding interface and attach netpoll again +test_delete_bond_and_reenable_target +echo "test #3: Able to attach to an unbound interface. Test passed." >&2 + +# Test #4: Enslave a sub-interface that had netconsole enabled +# Try to enslave an interface that has netconsole/netpoll enabled. +# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it +test_enslave_netcons_enabled_iface +echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2 + +# Test #5: Enslave a sub-interface to a bonding interface +# Enslave an interface to a bond interface that has netpoll attached +# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of +# it. Netconsole is currently disabled +test_enslave_iface_to_bond +echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2 + +# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface +# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is +# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface +# and it should fail, with netpoll: veth0 doesn't support polling +test_enslave_iff_disabled_netpoll_iface +echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2 + +cleanup_bond +trap - EXIT +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 9b5ef8074440..87f89fd92f8c 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -11,9 +11,11 @@ set -euo pipefail LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") SRCIF="" # to be populated later +SRCIP="" # to be populated later SRCIP4="192.0.2.1" SRCIP6="fc00::1" DSTIF="" # to be populated later +DSTIP="" # to be populated later DSTIP4="192.0.2.2" DSTIP6="fc00::2" @@ -28,17 +30,23 @@ NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" # NAMESPACE will be populated by setup_ns with a random value NAMESPACE="" -# IDs for netdevsim +# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test +# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the +# same time. NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_BOND_TX_1=$((768 + RANDOM % 256)) +NSIM_BOND_TX_2=$((1024 + RANDOM % 256)) +NSIM_BOND_RX_1=$((1280 + RANDOM % 256)) +NSIM_BOND_RX_2=$((1536 + RANDOM % 256)) NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" +NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" # Used to create and delete namespaces source "${LIBDIR}"/../../../../net/lib.sh # Create netdevsim interfaces create_ifaces() { - echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" udevadm settle 2> /dev/null || true @@ -137,9 +145,6 @@ function _create_dynamic_target() { then echo 1 > "${NCPATH}"/extended fi - - echo 1 > "${NCPATH}"/enabled - } function create_dynamic_target() { @@ -147,6 +152,8 @@ function create_dynamic_target() { local NCPATH=${2:-"$NETCONS_PATH"} _create_dynamic_target "${FORMAT}" "${NCPATH}" + echo 1 > "${NCPATH}"/enabled + # This will make sure that the kernel was able to # load the netconsole driver configuration. The console message # gets more organized/sequential as well. @@ -193,14 +200,26 @@ function do_cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } -function cleanup() { +function cleanup_netcons() { # delete netconsole dynamic reconfiguration - echo 0 > "${NETCONS_PATH}"/enabled + # do not fail if the target is already disabled + if [[ ! -d "${NETCONS_PATH}" ]] + then + # in some cases this is called before netcons path is created + return + fi + if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]] + then + echo 0 > "${NETCONS_PATH}"/enabled || true + fi # Remove all the keys that got created during the selftest find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete # Remove the configfs entry rmdir "${NETCONS_PATH}" +} +function cleanup() { + cleanup_netcons do_cleanup } @@ -377,3 +396,24 @@ function wait_for_port() { # more frequently on IPv6 sleep 1 } + +# Clean up netdevsim ifaces created for bonding test +function cleanup_bond_nsim() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond || true + ip -n "${RXNS}" \ + link delete "${BOND_RX_MAIN_IF}" type bond || true + + cleanup_netdevsim "$NSIM_BOND_TX_1" + cleanup_netdevsim "$NSIM_BOND_TX_2" + cleanup_netdevsim "$NSIM_BOND_RX_1" + cleanup_netdevsim "$NSIM_BOND_RX_2" +} + +# cleanup tests that use bonding interfaces +function cleanup_bond() { + cleanup_netcons + cleanup_bond_nsim + cleanup_all_ns + ip link delete "${VETH0}" || true +} From 7fe0d21f5633af8c3fab9f0ef0706c6156623484 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Mon, 10 Nov 2025 14:26:18 +0200 Subject: [PATCH 0955/1075] wifi: mac80211: skip rate verification for not captured PSDUs If for example the sniffer did not follow any AIDs in an MU frame, then some of the information may not be filled in or is even expected to be invalid. As an example, in that case it is expected that Nss is zero. Fixes: 2ff5e52e7836 ("radiotap: add 0-length PSDU "not captured" type") Signed-off-by: Benjamin Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251110142554.83a2858ee15b.I9f78ce7984872f474722f9278691ae16378f0a3e@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6af43dfefdd6..5b4c3fe9970a 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -5360,10 +5360,14 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, if (WARN_ON(!local->started)) goto drop; - if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC))) { + if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC) && + !(status->flag & RX_FLAG_NO_PSDU && + status->zero_length_psdu_type == + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED))) { /* - * Validate the rate, unless a PLCP error means that - * we probably can't have a valid rate here anyway. + * Validate the rate, unless there was a PLCP error which may + * have an invalid rate or the PSDU was not capture and may be + * missing rate information. */ switch (status->encoding) { From eaa7ce66c3e2ccda035022b5e8af09caabecd635 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 10 Nov 2025 14:01:54 +0200 Subject: [PATCH 0956/1075] wifi: mac80211_hwsim: Fix possible NULL dereference The 'vif' pointer in the Tx information might be NULL, e.g., in case of injected frames etc. and is not checked in all paths. Fix it. While at it, also directly use the local 'vif' pointer. Fixes: a37a6f54439b ("wifi: mac80211_hwsim: Add simulation support for NAN device") Signed-off-by: Ilan Peer Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-wireless/aNJUlyIiSTW9zZdr@stanley.mountain Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251110140128.ec00ae795a32.I9c65659b52434189d8b2ba06710d482669a3887a@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index d28bf18d57ec..5903d82e1ab1 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -2003,8 +2003,14 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, struct ieee80211_sta *sta = control->sta; struct ieee80211_bss_conf *bss_conf; + /* This can happen in case of monitor injection */ + if (!vif) { + ieee80211_free_txskb(hw, skb); + return; + } + if (link != IEEE80211_LINK_UNSPECIFIED) { - bss_conf = rcu_dereference(txi->control.vif->link_conf[link]); + bss_conf = rcu_dereference(vif->link_conf[link]); if (sta) link_sta = rcu_dereference(sta->link[link]); } else { @@ -2065,13 +2071,13 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, return; } - if (txi->control.vif) - hwsim_check_magic(txi->control.vif); + if (vif) + hwsim_check_magic(vif); if (control->sta) hwsim_check_sta_magic(control->sta); if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) - ieee80211_get_tx_rates(txi->control.vif, control->sta, skb, + ieee80211_get_tx_rates(vif, control->sta, skb, txi->control.rates, ARRAY_SIZE(txi->control.rates)); From c4e1ac09ee1c750890e36cb1f841f25518f23589 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Tue, 11 Nov 2025 11:07:29 +0100 Subject: [PATCH 0957/1075] wifi: mwl8k: inject DSSS Parameter Set element into beacons if missing Some Marvell AP firmware used with mwl8k misbehaves when beacon frames do not contain a WLAN_EID_DS_PARAMS element with the current channel. It was reported on OpenWrt Github issues [0]. When hostapd/mac80211 omits DSSS Parameter Set from the beacon (which is valid on some bands), the firmware stops transmitting sane frames and RX status starts reporting bogus channel information. This makes AP mode unusable. Newer Marvell drivers (mwlwifi [1]) hard-code DSSS Parameter Set into AP beacons for all chips, which suggests this is a firmware requirement rather than a mwl8k-specific quirk. Mirror that behaviour in mwl8k: when setting the beacon, check if WLAN_EID_DS_PARAMS is present, and if not, extend the beacon and inject a DSSS Parameter Set element, using the current channel from hw->conf.chandef.chan. Tested on Linksys EA4500 (88W8366). [0] https://github.com/openwrt/openwrt/issues/19088 [1] https://github.com/kaloz/mwlwifi/blob/db97edf20fadea2617805006f5230665fadc6a8c/hif/fwcmd.c#L675 Fixes: b64fe619e371 ("mwl8k: basic AP interface support") Tested-by: Antony Kolitsos Signed-off-by: Pawel Dembicki Link: https://patch.msgid.link/20251111100733.2825970-3-paweldembicki@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/marvell/mwl8k.c | 71 ++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 891e125ad30b..54d6d00ecdf1 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -2966,6 +2966,51 @@ mwl8k_cmd_rf_antenna(struct ieee80211_hw *hw, int antenna, int mask) /* * CMD_SET_BEACON. */ + +static bool mwl8k_beacon_has_ds_params(const u8 *buf, int len) +{ + const struct ieee80211_mgmt *mgmt = (const void *)buf; + int ies_len; + + if (len <= offsetof(struct ieee80211_mgmt, u.beacon.variable)) + return false; + + ies_len = len - offsetof(struct ieee80211_mgmt, u.beacon.variable); + + return cfg80211_find_ie(WLAN_EID_DS_PARAMS, mgmt->u.beacon.variable, + ies_len) != NULL; +} + +static void mwl8k_beacon_copy_inject_ds_params(struct ieee80211_hw *hw, + u8 *buf_dst, const u8 *buf_src, + int src_len) +{ + const struct ieee80211_mgmt *mgmt = (const void *)buf_src; + static const u8 before_ds_params[] = { + WLAN_EID_SSID, + WLAN_EID_SUPP_RATES, + }; + const u8 *ies; + int hdr_len, left, offs, pos; + + ies = mgmt->u.beacon.variable; + hdr_len = offsetof(struct ieee80211_mgmt, u.beacon.variable); + + offs = ieee80211_ie_split(ies, src_len - hdr_len, before_ds_params, + ARRAY_SIZE(before_ds_params), 0); + + pos = hdr_len + offs; + left = src_len - pos; + + memcpy(buf_dst, buf_src, pos); + + /* Inject a DSSS Parameter Set after SSID + Supp Rates */ + buf_dst[pos + 0] = WLAN_EID_DS_PARAMS; + buf_dst[pos + 1] = 1; + buf_dst[pos + 2] = hw->conf.chandef.chan->hw_value; + + memcpy(buf_dst + pos + 3, buf_src + pos, left); +} struct mwl8k_cmd_set_beacon { struct mwl8k_cmd_pkt_hdr header; __le16 beacon_len; @@ -2975,17 +3020,33 @@ struct mwl8k_cmd_set_beacon { static int mwl8k_cmd_set_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u8 *beacon, int len) { + bool ds_params_present = mwl8k_beacon_has_ds_params(beacon, len); struct mwl8k_cmd_set_beacon *cmd; - int rc; + int rc, final_len = len; - cmd = kzalloc(sizeof(*cmd) + len, GFP_KERNEL); + if (!ds_params_present) { + /* + * mwl8k firmware requires a DS Params IE with the current + * channel in AP beacons. If mac80211/hostapd does not + * include it, inject one here. IE ID + length + channel + * number = 3 bytes. + */ + final_len += 3; + } + + cmd = kzalloc(sizeof(*cmd) + final_len, GFP_KERNEL); if (cmd == NULL) return -ENOMEM; cmd->header.code = cpu_to_le16(MWL8K_CMD_SET_BEACON); - cmd->header.length = cpu_to_le16(sizeof(*cmd) + len); - cmd->beacon_len = cpu_to_le16(len); - memcpy(cmd->beacon, beacon, len); + cmd->header.length = cpu_to_le16(sizeof(*cmd) + final_len); + cmd->beacon_len = cpu_to_le16(final_len); + + if (ds_params_present) + memcpy(cmd->beacon, beacon, len); + else + mwl8k_beacon_copy_inject_ds_params(hw, cmd->beacon, beacon, + len); rc = mwl8k_post_pervif_cmd(hw, vif, &cmd->header); kfree(cmd); From 0f559cd91e37b7978e4198ca2fbf7eb95df11361 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 10 Nov 2025 17:30:10 +0000 Subject: [PATCH 0958/1075] KVM: arm64: Finalize ID registers only once per VM Owing to the ID registers being global to the VM, there is no point in computing them more than once. However, recent changes making use of kvm_set_vm_id_reg() outlined that we repeatedly hammer the ID registers when we shouldn't. Gate the ID reg update on the VM having never run. Fixes: 50e7cce81b9b2 ("KVM: arm64: Limit clearing of ID_{AA64PFR0,PFR1}_EL1.GIC to userspace irqchip") Fixes: 5cb57a1aff755 ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest") Closes: https://lore.kernel.org/r/aRHf6x5umkTYhYJ3@finisterre.sirena.org.uk Reported-by: Mark Brown Tested-by: Mark Brown Link: https://patch.msgid.link/20251110173010.1918424-1-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8ae2bca81614..ec3fbe0b8d52 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -5609,7 +5609,11 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu) guard(mutex)(&kvm->arch.config_lock); - if (!irqchip_in_kernel(kvm)) { + /* + * This hacks into the ID registers, so only perform it when the + * first vcpu runs, or the kvm_set_vm_id_reg() helper will scream. + */ + if (!irqchip_in_kernel(kvm) && !kvm_vm_has_ran_once(kvm)) { u64 val; val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; From 60e6489f8e3b086bd1130ad4450a2c112e863791 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sun, 9 Nov 2025 02:52:22 +0000 Subject: [PATCH 0959/1075] af_unix: Initialise scc_index in unix_add_edge(). Quang Le reported that the AF_UNIX GC could garbage-collect a receive queue of an alive in-flight socket, with a nice repro. The repro consists of three stages. 1) 1-a. Create a single cyclic reference with many sockets 1-b. close() all sockets 1-c. Trigger GC 2) 2-a. Pass sk-A to an embryo sk-B 2-b. Pass sk-X to sk-X 2-c. Trigger GC 3) 3-a. accept() the embryo sk-B 3-b. Pass sk-B to sk-C 3-c. close() the in-flight sk-A 3-d. Trigger GC As of 2-c, sk-A and sk-X are linked to unix_unvisited_vertices, and unix_walk_scc() groups them into two different SCCs: unix_sk(sk-A)->vertex->scc_index = 2 (UNIX_VERTEX_INDEX_START) unix_sk(sk-X)->vertex->scc_index = 3 Once GC completes, unix_graph_grouped is set to true. Also, unix_graph_maybe_cyclic is set to true due to sk-X's cyclic self-reference, which makes close() trigger GC. At 3-b, unix_add_edge() allocates unix_sk(sk-B)->vertex and links it to unix_unvisited_vertices. unix_update_graph() is called at 3-a. and 3-b., but neither unix_graph_grouped nor unix_graph_maybe_cyclic is changed because both sk-B's listener and sk-C are not in-flight. 3-c decrements sk-A's file refcnt to 1. Since unix_graph_grouped is true at 3-d, unix_walk_scc_fast() is finally called and iterates 3 sockets sk-A, sk-B, and sk-X: sk-A -> sk-B (-> sk-C) sk-X -> sk-X This is totally fine. All of them are not yet close()d and should be grouped into different SCCs. However, unix_vertex_dead() misjudges that sk-A and sk-B are in the same SCC and sk-A is dead. unix_sk(sk-A)->scc_index == unix_sk(sk-B)->scc_index <-- Wrong! && sk-A's file refcnt == unix_sk(sk-A)->vertex->out_degree ^-- 1 in-flight count for sk-B -> sk-A is dead !? The problem is that unix_add_edge() does not initialise scc_index. Stage 1) is used for heap spraying, making a newly allocated vertex have vertex->scc_index == 2 (UNIX_VERTEX_INDEX_START) set by unix_walk_scc() at 1-c. Let's track the max SCC index from the previous unix_walk_scc() call and assign the max + 1 to a new vertex's scc_index. This way, we can continue to avoid Tarjan's algorithm while preventing misjudgments. Fixes: ad081928a8b0 ("af_unix: Avoid Tarjan's algorithm if unnecessary.") Reported-by: Quang Le Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20251109025233.3659187-1-kuniyu@google.com Signed-off-by: Paolo Abeni --- net/unix/garbage.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 684ab03137b6..65396a4e1b07 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -145,6 +145,7 @@ enum unix_vertex_index { }; static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1; +static unsigned long unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) { @@ -153,6 +154,7 @@ static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) if (!vertex) { vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry); vertex->index = unix_vertex_unvisited_index; + vertex->scc_index = ++unix_vertex_max_scc_index; vertex->out_degree = 0; INIT_LIST_HEAD(&vertex->edges); INIT_LIST_HEAD(&vertex->scc_entry); @@ -489,10 +491,15 @@ static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_inde scc_dead = unix_vertex_dead(v); } - if (scc_dead) + if (scc_dead) { unix_collect_skb(&scc, hitlist); - else if (!unix_graph_maybe_cyclic) - unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + } else { + if (unix_vertex_max_scc_index < vertex->scc_index) + unix_vertex_max_scc_index = vertex->scc_index; + + if (!unix_graph_maybe_cyclic) + unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + } list_del(&scc); } @@ -507,6 +514,7 @@ static void unix_walk_scc(struct sk_buff_head *hitlist) unsigned long last_index = UNIX_VERTEX_INDEX_START; unix_graph_maybe_cyclic = false; + unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; /* Visit every vertex exactly once. * __unix_walk_scc() moves visited vertices to unix_visited_vertices. From fd3ecda38fe0cb713d167b5477d25f6b350f0514 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Tue, 11 Nov 2025 16:08:01 +0800 Subject: [PATCH 0960/1075] EDAC/altera: Handle OCRAM ECC enable after warm reset The OCRAM ECC is always enabled either by the BootROM or by the Secure Device Manager (SDM) during a power-on reset on SoCFPGA. However, during a warm reset, the OCRAM content is retained to preserve data, while the control and status registers are reset to their default values. As a result, ECC must be explicitly re-enabled after a warm reset. Fixes: 17e47dc6db4f ("EDAC/altera: Add Stratix10 OCRAM ECC support") Signed-off-by: Niravkumar L Rabara Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251111080801.1279401-1-niravkumarlaxmidas.rabara@altera.com --- drivers/edac/altera_edac.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 103b2c2eba2a..a776d61027f2 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1184,10 +1184,22 @@ altr_check_ocram_deps_init(struct altr_edac_device_dev *device) if (ret) return ret; - /* Verify OCRAM has been initialized */ + /* + * Verify that OCRAM has been initialized. + * During a warm reset, OCRAM contents are retained, but the control + * and status registers are reset to their default values. Therefore, + * ECC must be explicitly re-enabled in the control register. + * Error condition: if INITCOMPLETEA is clear and ECC_EN is already set. + */ if (!ecc_test_bits(ALTR_A10_ECC_INITCOMPLETEA, - (base + ALTR_A10_ECC_INITSTAT_OFST))) - return -ENODEV; + (base + ALTR_A10_ECC_INITSTAT_OFST))) { + if (!ecc_test_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST))) + ecc_set_bits(ALTR_A10_ECC_EN, + (base + ALTR_A10_ECC_CTRL_OFST)); + else + return -ENODEV; + } /* Enable IRQ on Single Bit Error */ writel(ALTR_A10_ECC_SERRINTEN, (base + ALTR_A10_ECC_ERRINTENS_OFST)); From 485e0626e58768f3c53ba61ab9e09d6b60a455f4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Thu, 6 Nov 2025 13:05:35 -0500 Subject: [PATCH 0961/1075] Bluetooth: hci_event: Fix not handling PA Sync Lost event This handles PA Sync Lost event which previously was assumed to be handled with BIG Sync Lost but their lifetime are not the same thus why there are 2 different events to inform when each sync is lost. Fixes: b2a5f2e1c127 ("Bluetooth: hci_event: Add support for handling LE BIG Sync Lost event") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 5 ++++ net/bluetooth/hci_event.c | 49 ++++++++++++++++++++++++++----------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 8d0e703bc929..cb4c02d00759 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -2783,6 +2783,11 @@ struct hci_ev_le_per_adv_report { __u8 data[]; } __packed; +#define HCI_EV_LE_PA_SYNC_LOST 0x10 +struct hci_ev_le_pa_sync_lost { + __le16 handle; +} __packed; + #define LE_PA_DATA_COMPLETE 0x00 #define LE_PA_DATA_MORE_TO_COME 0x01 #define LE_PA_DATA_TRUNCATED 0x02 diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 03328c1dd090..3838b90343d9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5843,6 +5843,29 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev, void *data, le16_to_cpu(ev->supervision_timeout)); } +static void hci_le_pa_sync_lost_evt(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_ev_le_pa_sync_lost *ev = data; + u16 handle = le16_to_cpu(ev->handle); + struct hci_conn *conn; + + bt_dev_dbg(hdev, "sync handle 0x%4.4x", handle); + + hci_dev_lock(hdev); + + /* Delete the pa sync connection */ + conn = hci_conn_hash_lookup_pa_sync_handle(hdev, handle); + if (conn) { + clear_bit(HCI_CONN_BIG_SYNC, &conn->flags); + clear_bit(HCI_CONN_PA_SYNC, &conn->flags); + hci_disconn_cfm(conn, HCI_ERROR_REMOTE_USER_TERM); + hci_conn_del(conn); + } + + hci_dev_unlock(hdev); +} + static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -7046,29 +7069,24 @@ static void hci_le_big_sync_lost_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_evt_le_big_sync_lost *ev = data; - struct hci_conn *bis, *conn; - bool mgmt_conn; + struct hci_conn *bis; + bool mgmt_conn = false; bt_dev_dbg(hdev, "big handle 0x%2.2x", ev->handle); hci_dev_lock(hdev); - /* Delete the pa sync connection */ - bis = hci_conn_hash_lookup_pa_sync_big_handle(hdev, ev->handle); - if (bis) { - conn = hci_conn_hash_lookup_pa_sync_handle(hdev, - bis->sync_handle); - if (conn) - hci_conn_del(conn); - } - /* Delete each bis connection */ while ((bis = hci_conn_hash_lookup_big_state(hdev, ev->handle, BT_CONNECTED, HCI_ROLE_SLAVE))) { - mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &bis->flags); - mgmt_device_disconnected(hdev, &bis->dst, bis->type, bis->dst_type, - ev->reason, mgmt_conn); + if (!mgmt_conn) { + mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, + &bis->flags); + mgmt_device_disconnected(hdev, &bis->dst, bis->type, + bis->dst_type, ev->reason, + mgmt_conn); + } clear_bit(HCI_CONN_BIG_SYNC, &bis->flags); hci_disconn_cfm(bis, ev->reason); @@ -7182,6 +7200,9 @@ static const struct hci_le_ev { hci_le_per_adv_report_evt, sizeof(struct hci_ev_le_per_adv_report), HCI_MAX_EVENT_SIZE), + /* [0x10 = HCI_EV_LE_PA_SYNC_LOST] */ + HCI_LE_EV(HCI_EV_LE_PA_SYNC_LOST, hci_le_pa_sync_lost_evt, + sizeof(struct hci_ev_le_pa_sync_lost)), /* [0x12 = HCI_EV_LE_EXT_ADV_SET_TERM] */ HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt, sizeof(struct hci_evt_le_ext_adv_set_term)), From 281326be67252ac5794d1383f67526606b1d6b13 Mon Sep 17 00:00:00 2001 From: Niravkumar L Rabara Date: Tue, 11 Nov 2025 16:13:33 +0800 Subject: [PATCH 0962/1075] EDAC/altera: Use INTTEST register for Ethernet and USB SBE injection The current single-bit error injection mechanism flips bits directly in ECC RAM by performing write and read operations. When the ECC RAM is actively used by the Ethernet or USB controller, this approach sometimes trigger a false double-bit error. Switch both Ethernet and USB EDAC devices to use the INTTEST register (altr_edac_a10_device_inject_fops) for single-bit error injection, similar to the existing double-bit error injection method. Fixes: 064acbd4f4ab ("EDAC, altera: Add Stratix10 peripheral support") Signed-off-by: Niravkumar L Rabara Signed-off-by: Borislav Petkov (AMD) Acked-by: Dinh Nguyen Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20251111081333.1279635-1-niravkumarlaxmidas.rabara@altera.com --- drivers/edac/altera_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index a776d61027f2..0c5b94e64ea1 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -1369,7 +1369,7 @@ static const struct edac_device_prv_data a10_enetecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject2_fops, + .inject_fops = &altr_edac_a10_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_ETHERNET */ @@ -1459,7 +1459,7 @@ static const struct edac_device_prv_data a10_usbecc_data = { .ue_set_mask = ALTR_A10_ECC_TDERRA, .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, .ecc_irq_handler = altr_edac_a10_ecc_irq, - .inject_fops = &altr_edac_a10_device_inject2_fops, + .inject_fops = &altr_edac_a10_device_inject_fops, }; #endif /* CONFIG_EDAC_ALTERA_USB */ From 62b656e43eaeae445a39cd8021a4f47065af4389 Mon Sep 17 00:00:00 2001 From: Ranganath V N Date: Sun, 9 Nov 2025 14:43:35 +0530 Subject: [PATCH 0963/1075] net: sched: act_connmark: initialize struct tc_ife to fix kernel leak In tcf_connmark_dump(), the variable 'opt' was partially initialized using a designatied initializer. While the padding bytes are reamined uninitialized. nla_put() copies the entire structure into a netlink message, these uninitialized bytes leaked to userspace. Initialize the structure with memset before assigning its fields to ensure all members and padding are cleared prior to beign copied. Reported-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0c85cae3350b7d486aee Tested-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com Fixes: 22a5dc0e5e3e ("net: sched: Introduce connmark action") Signed-off-by: Ranganath V N Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251109091336.9277-2-vnranganath.20@gmail.com Acked-by: Cong Wang Signed-off-by: Paolo Abeni --- net/sched/act_connmark.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 3e89927d7116..26ba8c2d20ab 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -195,13 +195,15 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, const struct tcf_connmark_info *ci = to_connmark(a); unsigned char *b = skb_tail_pointer(skb); const struct tcf_connmark_parms *parms; - struct tc_connmark opt = { - .index = ci->tcf_index, - .refcnt = refcount_read(&ci->tcf_refcnt) - ref, - .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, - }; + struct tc_connmark opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + + opt.index = ci->tcf_index; + opt.refcnt = refcount_read(&ci->tcf_refcnt) - ref; + opt.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind; + rcu_read_lock(); parms = rcu_dereference(ci->parms); From ce50039be49eea9b4cd8873ca6eccded1b4a130a Mon Sep 17 00:00:00 2001 From: Ranganath V N Date: Sun, 9 Nov 2025 14:43:36 +0530 Subject: [PATCH 0964/1075] net: sched: act_ife: initialize struct tc_ife to fix KMSAN kernel-infoleak Fix a KMSAN kernel-infoleak detected by the syzbot . [net?] KMSAN: kernel-infoleak in __skb_datagram_iter In tcf_ife_dump(), the variable 'opt' was partially initialized using a designatied initializer. While the padding bytes are reamined uninitialized. nla_put() copies the entire structure into a netlink message, these uninitialized bytes leaked to userspace. Initialize the structure with memset before assigning its fields to ensure all members and padding are cleared prior to beign copied. This change silences the KMSAN report and prevents potential information leaks from the kernel memory. This fix has been tested and validated by syzbot. This patch closes the bug reported at the following syzkaller link and ensures no infoleak. Reported-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=0c85cae3350b7d486aee Tested-by: syzbot+0c85cae3350b7d486aee@syzkaller.appspotmail.com Fixes: ef6980b6becb ("introduce IFE action") Signed-off-by: Ranganath V N Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251109091336.9277-3-vnranganath.20@gmail.com Acked-by: Cong Wang Signed-off-by: Paolo Abeni --- net/sched/act_ife.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 107c6d83dc5c..7c6975632fc2 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -644,13 +644,15 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, unsigned char *b = skb_tail_pointer(skb); struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; - struct tc_ife opt = { - .index = ife->tcf_index, - .refcnt = refcount_read(&ife->tcf_refcnt) - ref, - .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, - }; + struct tc_ife opt; struct tcf_t t; + memset(&opt, 0, sizeof(opt)); + + opt.index = ife->tcf_index, + opt.refcnt = refcount_read(&ife->tcf_refcnt) - ref, + opt.bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, + spin_lock_bh(&ife->tcf_lock); opt.action = ife->tcf_action; p = rcu_dereference_protected(ife->params, From 0bcd5b3b50cc1fcbf775479322cc37c15d35a489 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Sun, 9 Nov 2025 11:37:49 +0200 Subject: [PATCH 0965/1075] net/mlx5e: Fix missing error assignment in mlx5e_xfrm_add_state() Assign the return value of mlx5_eswitch_block_mode() to 'err' before checking it to avoid returning an uninitialized error code. Fixes: 22239eb258bc ("net/mlx5e: Prevent tunnel reformat when tunnel mode not allowed") Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202510271649.uwsIxD6O-lkp@intel.com/ Closes: http://lore.kernel.org/linux-rdma/aPIEK4rLB586FdDt@stanley.mountain/ Signed-off-by: Carolina Jubran Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-2-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 0a4fb8c92268..35d9530037a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -804,7 +804,8 @@ static int mlx5e_xfrm_add_state(struct net_device *dev, goto err_xfrm; } - if (mlx5_eswitch_block_mode(priv->mdev)) + err = mlx5_eswitch_block_mode(priv->mdev); + if (err) goto unblock_ipsec; if (x->props.mode == XFRM_MODE_TUNNEL && From 2dc768c05217e667f987907a3404926e7ba89ff3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Sun, 9 Nov 2025 11:37:50 +0200 Subject: [PATCH 0966/1075] net/mlx5e: Trim the length of the num_doorbell error When trying to set num_doorbells to a value greater than the max number of channels, the error message was going over the netlink limit of 80 chars, truncating the most important part of the message, the number of channels. Fix that by trimming the length a bit. Fixes: 11bbcfb7668c ("net/mlx5e: Use the 'num_doorbells' devlink param") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-3-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index fceea83abbd7..887adf4807d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -541,7 +541,7 @@ static int mlx5_devlink_num_doorbells_validate(struct devlink *devlink, u32 id, max_num_channels = mlx5e_get_max_num_channels(mdev); if (val32 > max_num_channels) { NL_SET_ERR_MSG_FMT_MOD(extack, - "Requested num_doorbells (%u) exceeds maximum number of channels (%u)", + "Requested num_doorbells (%u) exceeds max number of channels (%u)", val32, max_num_channels); return -EINVAL; } From a7bf4d5063c7837096aab2853224eb23628514d9 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 9 Nov 2025 11:37:51 +0200 Subject: [PATCH 0967/1075] net/mlx5e: Fix maxrate wraparound in threshold between units The previous calculation used roundup() which caused an overflow for rates between 25.5Gbps and 26Gbps. For example, a rate of 25.6Gbps would result in using 100Mbps units with value of 256, which would overflow the 8 bits field. Simplify the upper_limit_mbps calculation by removing the unnecessary roundup, and adjust the comparison to use <= to correctly handle the boundary condition. Fixes: d8880795dabf ("net/mlx5e: Implement DCBNL IEEE max rate") Signed-off-by: Gal Pressman Reviewed-by: Nimrod Oren Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-4-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d166c0d5189e..345614471052 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -595,18 +595,19 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, struct mlx5_core_dev *mdev = priv->mdev; u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; - __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB); + __u64 upper_limit_mbps; int i; memset(max_bw_value, 0, sizeof(max_bw_value)); memset(max_bw_unit, 0, sizeof(max_bw_unit)); + upper_limit_mbps = 255 * MLX5E_100MB; for (i = 0; i <= mlx5_max_tc(mdev); i++) { if (!maxrate->tc_maxrate[i]) { max_bw_unit[i] = MLX5_BW_NO_LIMIT; continue; } - if (maxrate->tc_maxrate[i] < upper_limit_mbps) { + if (maxrate->tc_maxrate[i] <= upper_limit_mbps) { max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], MLX5E_100MB); max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; From 43b27d1bd88a4bce34ec2437d103acfae9655f9e Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 9 Nov 2025 11:37:52 +0200 Subject: [PATCH 0968/1075] net/mlx5e: Fix wraparound in rate limiting for values above 255 Gbps Add validation to reject rates exceeding 255 Gbps that would overflow the 8 bits max bandwidth field. Fixes: d8880795dabf ("net/mlx5e: Implement DCBNL IEEE max rate") Signed-off-by: Gal Pressman Reviewed-by: Nimrod Oren Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-5-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 345614471052..d88a48210fdc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -596,11 +596,13 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; __u64 upper_limit_mbps; + __u64 upper_limit_gbps; int i; memset(max_bw_value, 0, sizeof(max_bw_value)); memset(max_bw_unit, 0, sizeof(max_bw_unit)); upper_limit_mbps = 255 * MLX5E_100MB; + upper_limit_gbps = 255 * MLX5E_1GB; for (i = 0; i <= mlx5_max_tc(mdev); i++) { if (!maxrate->tc_maxrate[i]) { @@ -612,10 +614,16 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, MLX5E_100MB); max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; max_bw_unit[i] = MLX5_100_MBPS_UNIT; - } else { + } else if (max_bw_value[i] <= upper_limit_gbps) { max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], MLX5E_1GB); max_bw_unit[i] = MLX5_GBPS_UNIT; + } else { + netdev_err(netdev, + "tc_%d maxrate %llu Kbps exceeds limit %llu\n", + i, maxrate->tc_maxrate[i], + upper_limit_gbps); + return -EINVAL; } } From 9fcc2b6c10523f7e75db6387946c86fcf19dc97e Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 9 Nov 2025 11:37:53 +0200 Subject: [PATCH 0969/1075] net/mlx5e: Fix potentially misleading debug message Change the debug message to print the correct units instead of always assuming Gbps, as the value can be in either 100 Mbps or 1 Gbps units. Fixes: 5da8bc3effb6 ("net/mlx5e: DCBNL, Add debug messages log") Signed-off-by: Gal Pressman Reviewed-by: Nimrod Oren Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1762681073-1084058-6-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- .../net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d88a48210fdc..9b93da4d52f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -598,6 +598,19 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, __u64 upper_limit_mbps; __u64 upper_limit_gbps; int i; + struct { + int scale; + const char *units_str; + } units[] = { + [MLX5_100_MBPS_UNIT] = { + .scale = 100, + .units_str = "Mbps", + }, + [MLX5_GBPS_UNIT] = { + .scale = 1, + .units_str = "Gbps", + }, + }; memset(max_bw_value, 0, sizeof(max_bw_value)); memset(max_bw_unit, 0, sizeof(max_bw_unit)); @@ -628,8 +641,9 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev, } for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { - netdev_dbg(netdev, "%s: tc_%d <=> max_bw %d Gbps\n", - __func__, i, max_bw_value[i]); + netdev_dbg(netdev, "%s: tc_%d <=> max_bw %u %s\n", __func__, i, + max_bw_value[i] * units[max_bw_unit[i]].scale, + units[max_bw_unit[i]].units_str); } return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit); From cd8dbd9ef600435439bb0e70af0a1d9e2193aecb Mon Sep 17 00:00:00 2001 From: Max Chou Date: Wed, 5 Nov 2025 20:02:04 +0800 Subject: [PATCH 0970/1075] Bluetooth: btrtl: Avoid loading the config file on security chips For chips with security enabled, it's only possible to load firmware with a valid signature pattern. If key_id is not zero, it indicates a security chip, and the driver will not load the config file. - Example log for a security chip. Bluetooth: hci0: RTL: examining hci_ver=0c hci_rev=000a lmp_ver=0c lmp_subver=8922 Bluetooth: hci0: RTL: rom_version status=0 version=1 Bluetooth: hci0: RTL: btrtl_initialize: key id 1 Bluetooth: hci0: RTL: loading rtl_bt/rtl8922au_fw.bin Bluetooth: hci0: RTL: cfg_sz 0, total sz 71301 Bluetooth: hci0: RTL: fw version 0x41c0c905 - Example log for a normal chip. Bluetooth: hci0: RTL: examining hci_ver=0c hci_rev=000a lmp_ver=0c lmp_subver=8922 Bluetooth: hci0: RTL: rom_version status=0 version=1 Bluetooth: hci0: RTL: btrtl_initialize: key id 0 Bluetooth: hci0: RTL: loading rtl_bt/rtl8922au_fw.bin Bluetooth: hci0: RTL: loading rtl_bt/rtl8922au_config.bin Bluetooth: hci0: RTL: cfg_sz 6, total sz 71307 Bluetooth: hci0: RTL: fw version 0x41c0c905 Tested-by: Hilda Wu Signed-off-by: Nial Ni Signed-off-by: Max Chou Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btrtl.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 1d4a7887abcc..52794db2739b 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -50,7 +50,7 @@ #define RTL_CHIP_SUBVER (&(struct rtl_vendor_cmd) {{0x10, 0x38, 0x04, 0x28, 0x80}}) #define RTL_CHIP_REV (&(struct rtl_vendor_cmd) {{0x10, 0x3A, 0x04, 0x28, 0x80}}) -#define RTL_SEC_PROJ (&(struct rtl_vendor_cmd) {{0x10, 0xA4, 0x0D, 0x00, 0xb0}}) +#define RTL_SEC_PROJ (&(struct rtl_vendor_cmd) {{0x10, 0xA4, 0xAD, 0x00, 0xb0}}) #define RTL_PATCH_SNIPPETS 0x01 #define RTL_PATCH_DUMMY_HEADER 0x02 @@ -534,7 +534,6 @@ static int rtlbt_parse_firmware_v2(struct hci_dev *hdev, { struct rtl_epatch_header_v2 *hdr; int rc; - u8 reg_val[2]; u8 key_id; u32 num_sections; struct rtl_section *section; @@ -549,14 +548,7 @@ static int rtlbt_parse_firmware_v2(struct hci_dev *hdev, .len = btrtl_dev->fw_len - 7, /* Cut the tail */ }; - rc = btrtl_vendor_read_reg16(hdev, RTL_SEC_PROJ, reg_val); - if (rc < 0) - return -EIO; - key_id = reg_val[0]; - - rtl_dev_dbg(hdev, "%s: key id %u", __func__, key_id); - - btrtl_dev->key_id = key_id; + key_id = btrtl_dev->key_id; hdr = rtl_iov_pull_data(&iov, sizeof(*hdr)); if (!hdr) @@ -1070,6 +1062,8 @@ struct btrtl_device_info *btrtl_initialize(struct hci_dev *hdev, u16 hci_rev, lmp_subver; u8 hci_ver, lmp_ver, chip_type = 0; int ret; + int rc; + u8 key_id; u8 reg_val[2]; btrtl_dev = kzalloc(sizeof(*btrtl_dev), GFP_KERNEL); @@ -1180,6 +1174,14 @@ struct btrtl_device_info *btrtl_initialize(struct hci_dev *hdev, goto err_free; } + rc = btrtl_vendor_read_reg16(hdev, RTL_SEC_PROJ, reg_val); + if (rc < 0) + goto err_free; + + key_id = reg_val[0]; + btrtl_dev->key_id = key_id; + rtl_dev_info(hdev, "%s: key id %u", __func__, key_id); + btrtl_dev->fw_len = -EIO; if (lmp_subver == RTL_ROM_LMP_8852A && hci_rev == 0x000c) { snprintf(fw_name, sizeof(fw_name), "%s_v2.bin", @@ -1202,7 +1204,7 @@ struct btrtl_device_info *btrtl_initialize(struct hci_dev *hdev, goto err_free; } - if (btrtl_dev->ic_info->cfg_name) { + if (btrtl_dev->ic_info->cfg_name && !btrtl_dev->key_id) { if (postfix) { snprintf(cfg_name, sizeof(cfg_name), "%s-%s.bin", btrtl_dev->ic_info->cfg_name, postfix); From e5eba42f01340f73888dfe560be2806057c25913 Mon Sep 17 00:00:00 2001 From: Akiva Goldberger Date: Sun, 9 Nov 2025 11:49:03 +0200 Subject: [PATCH 0971/1075] mlx5: Fix default values in create CQ Currently, CQs without a completion function are assigned the mlx5_add_cq_to_tasklet function by default. This is problematic since only user CQs created through the mlx5_ib driver are intended to use this function. Additionally, all CQs that will use doorbells instead of polling for completions must call mlx5_cq_arm. However, the default CQ creation flow leaves a valid value in the CQ's arm_db field, allowing FW to send interrupts to polling-only CQs in certain corner cases. These two factors would allow a polling-only kernel CQ to be triggered by an EQ interrupt and call a completion function intended only for user CQs, causing a null pointer exception. Some areas in the driver have prevented this issue with one-off fixes but did not address the root cause. This patch fixes the described issue by adding defaults to the create CQ flow. It adds a default dummy completion function to protect against null pointer exceptions, and it sets an invalid command sequence number by default in kernel CQs to prevent the FW from sending an interrupt to the CQ until it is armed. User CQs are responsible for their own initialization values. Callers of mlx5_core_create_cq are responsible for changing the completion function and arming the CQ per their needs. Fixes: cdd04f4d4d71 ("net/mlx5: Add support to create SQ and CQ for ASO") Signed-off-by: Akiva Goldberger Reviewed-by: Moshe Shemesh Signed-off-by: Tariq Toukan Acked-by: Leon Romanovsky Link: https://patch.msgid.link/1762681743-1084694-1-git-send-email-tariqt@nvidia.com Signed-off-by: Paolo Abeni --- drivers/infiniband/hw/mlx5/cq.c | 11 +++++--- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 23 +++++++++++++-- .../net/ethernet/mellanox/mlx5/core/en_main.c | 1 - .../ethernet/mellanox/mlx5/core/fpga/conn.c | 15 +++++----- .../mellanox/mlx5/core/steering/hws/send.c | 7 ----- .../mellanox/mlx5/core/steering/sws/dr_send.c | 28 +++++-------------- drivers/vdpa/mlx5/net/mlx5_vnet.c | 6 ++-- include/linux/mlx5/cq.h | 1 + 8 files changed, 44 insertions(+), 48 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index a23b364e24ff..651d76bca114 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1020,15 +1020,18 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) MLX5_SET(cqc, cqc, oi, 1); + if (udata) { + cq->mcq.comp = mlx5_add_cq_to_tasklet; + cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; + } else { + cq->mcq.comp = mlx5_ib_cq_comp; + } + err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out)); if (err) goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); - if (udata) - cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; - else - cq->mcq.comp = mlx5_ib_cq_comp; cq->mcq.event = mlx5_ib_cq_event; INIT_LIST_HEAD(&cq->wc_list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index e9f319a9bdd6..60f7ab1d72e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -66,8 +66,8 @@ void mlx5_cq_tasklet_cb(struct tasklet_struct *t) tasklet_schedule(&ctx->task); } -static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, - struct mlx5_eqe *eqe) +void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, + struct mlx5_eqe *eqe) { unsigned long flags; struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; @@ -95,7 +95,15 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, if (schedule_tasklet) tasklet_schedule(&tasklet_ctx->task); } +EXPORT_SYMBOL(mlx5_add_cq_to_tasklet); +static void mlx5_core_cq_dummy_cb(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe) +{ + mlx5_core_err(cq->eq->core.dev, + "CQ default completion callback, CQ #%u\n", cq->cqn); +} + +#define MLX5_CQ_INIT_CMD_SN cpu_to_be32(2 << 28) /* Callers must verify outbox status in case of err */ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen) @@ -121,10 +129,19 @@ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->arm_sn = 0; cq->eq = eq; cq->uid = MLX5_GET(create_cq_in, in, uid); + + /* Kernel CQs must set the arm_db address prior to calling + * this function, allowing for the proper value to be + * initialized. User CQs are responsible for their own + * initialization since they do not use the arm_db field. + */ + if (cq->arm_db) + *cq->arm_db = MLX5_CQ_INIT_CMD_SN; + refcount_set(&cq->refcount, 1); init_completion(&cq->free); if (!cq->comp) - cq->comp = mlx5_add_cq_to_tasklet; + cq->comp = mlx5_core_cq_dummy_cb; /* assuming CQ will be deleted before the EQ */ cq->tasklet_ctx.priv = &eq->tasklet_ctx; INIT_LIST_HEAD(&cq->tasklet_ctx.list); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6023bbbf3f39..5e17eae81f4b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2219,7 +2219,6 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; *mcq->set_ci_db = 0; - *mcq->arm_db = 0; mcq->vector = param->eq_ix; mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index cb1319974f83..ccef64fb40b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -421,6 +421,13 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) __be64 *pas; u32 i; + conn->cq.mcq.cqe_sz = 64; + conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; + conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; + *conn->cq.mcq.set_ci_db = 0; + conn->cq.mcq.vector = 0; + conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; + cq_size = roundup_pow_of_two(cq_size); MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size)); @@ -468,15 +475,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) if (err) goto err_cqwq; - conn->cq.mcq.cqe_sz = 64; - conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; - conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; - *conn->cq.mcq.set_ci_db = 0; - *conn->cq.mcq.arm_db = 0; - conn->cq.mcq.vector = 0; - conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); - mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index 24ef7d66fa8a..7510c46e58a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -873,12 +873,6 @@ static int hws_send_ring_open_sq(struct mlx5hws_context *ctx, return err; } -static void hws_cq_complete(struct mlx5_core_cq *mcq, - struct mlx5_eqe *eqe) -{ - pr_err("CQ completion CQ: #%u\n", mcq->cqn); -} - static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev, int numa_node, struct mlx5hws_send_engine *queue, @@ -901,7 +895,6 @@ static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev, mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; - mcq->comp = hws_cq_complete; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { cqe = mlx5_cqwq_get_wqe(&cq->wq, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c index 077a77fde670..d034372fa047 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c @@ -1049,12 +1049,6 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn) return 0; } -static void dr_cq_complete(struct mlx5_core_cq *mcq, - struct mlx5_eqe *eqe) -{ - pr_err("CQ completion CQ: #%u\n", mcq->cqn); -} - static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, struct mlx5_uars_page *uar, size_t ncqe) @@ -1089,6 +1083,13 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; } + cq->mcq.cqe_sz = 64; + cq->mcq.set_ci_db = cq->wq_ctrl.db.db; + cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; + *cq->mcq.set_ci_db = 0; + cq->mcq.vector = 0; + cq->mdev = mdev; + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.buf.npages; in = kvzalloc(inlen, GFP_KERNEL); @@ -1112,27 +1113,12 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); - cq->mcq.comp = dr_cq_complete; - err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); kvfree(in); if (err) goto err_cqwq; - cq->mcq.cqe_sz = 64; - cq->mcq.set_ci_db = cq->wq_ctrl.db.db; - cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; - *cq->mcq.set_ci_db = 0; - - /* set no-zero value, in order to avoid the HW to run db-recovery on - * CQ that used in polling mode. - */ - *cq->mcq.arm_db = cpu_to_be32(2 << 28); - - cq->mcq.vector = 0; - cq->mdev = mdev; - return cq; err_cqwq: diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 82034efb74fc..a7936bd1aabe 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -573,6 +573,8 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) vcq->mcq.set_ci_db = vcq->db.db; vcq->mcq.arm_db = vcq->db.db + 1; vcq->mcq.cqe_sz = 64; + vcq->mcq.comp = mlx5_vdpa_cq_comp; + vcq->cqe = num_ent; err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); if (err) @@ -612,10 +614,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) if (err) goto err_vec; - vcq->mcq.comp = mlx5_vdpa_cq_comp; - vcq->cqe = num_ent; - vcq->mcq.set_ci_db = vcq->db.db; - vcq->mcq.arm_db = vcq->db.db + 1; mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); kfree(in); return 0; diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 7ef2c7c7d803..9d47cdc727ad 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -183,6 +183,7 @@ static inline void mlx5_cq_put(struct mlx5_core_cq *cq) complete(&cq->free); } +void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe); int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, From 55286b1e1bf4ce55f61ad2816d4ff8a7861a8cbb Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 10 Nov 2025 16:24:20 +0100 Subject: [PATCH 0972/1075] smb: server: let smb_direct_disconnect_rdma_connection() turn CREATED into DISCONNECTED When smb_direct_disconnect_rdma_connection() turns SMBDIRECT_SOCKET_CREATED into SMBDIRECT_SOCKET_ERROR, we'll have the situation that smb_direct_disconnect_rdma_work() will set SMBDIRECT_SOCKET_DISCONNECTING and call rdma_disconnect(), which likely fails as we never reached the RDMA_CM_EVENT_ESTABLISHED. it means that wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED) in free_transport() will hang forever in SMBDIRECT_SOCKET_DISCONNECTING never reaching SMBDIRECT_SOCKET_DISCONNECTED. So we directly go from SMBDIRECT_SOCKET_CREATED to SMBDIRECT_SOCKET_DISCONNECTED. Fixes: b3fd52a0d85c ("smb: server: let smb_direct_disconnect_rdma_connection() set SMBDIRECT_SOCKET_ERROR...") Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 3d8d8cb456c1..e2be9a496154 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -334,6 +334,9 @@ smb_direct_disconnect_rdma_connection(struct smbdirect_socket *sc) break; case SMBDIRECT_SOCKET_CREATED: + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + break; + case SMBDIRECT_SOCKET_CONNECTED: sc->status = SMBDIRECT_SOCKET_ERROR; break; From ed6612165b74f09db00ef0abaf9831895ab28b7f Mon Sep 17 00:00:00 2001 From: Yiqi Sun Date: Tue, 11 Nov 2025 15:05:39 +0800 Subject: [PATCH 0973/1075] smb: fix invalid username check in smb3_fs_context_parse_param() Since the maximum return value of strnlen(..., CIFS_MAX_USERNAME_LEN) is CIFS_MAX_USERNAME_LEN, length check in smb3_fs_context_parse_param() is always FALSE and invalid. Fix the comparison in if statement. Signed-off-by: Yiqi Sun Signed-off-by: Steve French --- fs/smb/client/fs_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index c2d5bb23040c..0f894d09157b 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -1470,7 +1470,7 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, break; } - if (strnlen(param->string, CIFS_MAX_USERNAME_LEN) > + if (strnlen(param->string, CIFS_MAX_USERNAME_LEN) == CIFS_MAX_USERNAME_LEN) { pr_warn("username too long\n"); goto cifs_parse_mount_err; From c42af83c59b65d01c0f7a074e450bbbb43b22f0d Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 11 Nov 2025 10:00:10 +0900 Subject: [PATCH 0974/1075] memblock: fix memblock_estimated_nr_free_pages() for soft-reserved memory memblock_estimated_nr_free_pages() returns the difference between the total size of the "memory" memblock type and the "reserved" memblock type. The "soft-reserved" memory regions are added to the "reserved" memblock type, but not to the "memory" memblock type. Therefore, memblock_estimated_nr_free_pages() may return a smaller value than expected, or if it underflows, an extremely large value. /proc/sys/kernel/threads-max is determined by the value of memblock_estimated_nr_free_pages(). This issue was discovered on machines with CXL memory because kernel.threads-max was either smaller than expected or extremely large for the installed DRAM size. This fixes the issue by replacing memblock_reserved_size() with memblock_reserved_kern_size() that tells how much memory was reserved from the actual RAM. Suggested-by: Mike Rapoport Signed-off-by: Akinobu Mita Link: https://patch.msgid.link/20251111010010.7800-1-akinobu.mita@gmail.com Signed-off-by: Mike Rapoport (Microsoft) --- mm/memblock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index e23e16618e9b..f0f2dc66e9a2 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1826,7 +1826,8 @@ phys_addr_t __init_memblock memblock_reserved_kern_size(phys_addr_t limit, int n */ unsigned long __init memblock_estimated_nr_free_pages(void) { - return PHYS_PFN(memblock_phys_mem_size() - memblock_reserved_size()); + return PHYS_PFN(memblock_phys_mem_size() - + memblock_reserved_kern_size(MEMBLOCK_ALLOC_ANYWHERE, NUMA_NO_NODE)); } /* lowest address */ From 9e805625218b70d865fcee2105dbf835d473c074 Mon Sep 17 00:00:00 2001 From: Rakuram Eswaran Date: Thu, 23 Oct 2025 20:24:32 +0530 Subject: [PATCH 0975/1075] mmc: pxamci: Simplify pxamci_probe() error handling using devm APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch refactors pxamci_probe() to use devm-managed resource allocation (e.g. devm_dma_request_chan) and dev_err_probe() for improved readability and automatic cleanup on probe failure. It also removes redundant NULL assignments and manual resource release logic from pxamci_probe(), and eliminates the corresponding release calls from pxamci_remove(). Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202510041841.pRlunIfl-lkp@intel.com/ Fixes: 58c40f3faf742c ("mmc: pxamci: Use devm_mmc_alloc_host() helper") Suggested-by: Uwe Kleine-König Signed-off-by: Rakuram Eswaran Reviewed-by: Khalid Aziz Acked-by: Uwe Kleine-König Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/pxamci.c | 56 +++++++++++++-------------------------- 1 file changed, 18 insertions(+), 38 deletions(-) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index 26d03352af63..b5ea058ed467 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -652,10 +652,9 @@ static int pxamci_probe(struct platform_device *pdev) host->clkrt = CLKRT_OFF; host->clk = devm_clk_get(dev, NULL); - if (IS_ERR(host->clk)) { - host->clk = NULL; - return PTR_ERR(host->clk); - } + if (IS_ERR(host->clk)) + return dev_err_probe(dev, PTR_ERR(host->clk), + "Failed to acquire clock\n"); host->clkrate = clk_get_rate(host->clk); @@ -703,46 +702,37 @@ static int pxamci_probe(struct platform_device *pdev) platform_set_drvdata(pdev, mmc); - host->dma_chan_rx = dma_request_chan(dev, "rx"); - if (IS_ERR(host->dma_chan_rx)) { - host->dma_chan_rx = NULL; + host->dma_chan_rx = devm_dma_request_chan(dev, "rx"); + if (IS_ERR(host->dma_chan_rx)) return dev_err_probe(dev, PTR_ERR(host->dma_chan_rx), "unable to request rx dma channel\n"); - } - host->dma_chan_tx = dma_request_chan(dev, "tx"); - if (IS_ERR(host->dma_chan_tx)) { - dev_err(dev, "unable to request tx dma channel\n"); - ret = PTR_ERR(host->dma_chan_tx); - host->dma_chan_tx = NULL; - goto out; - } + + host->dma_chan_tx = devm_dma_request_chan(dev, "tx"); + if (IS_ERR(host->dma_chan_tx)) + return dev_err_probe(dev, PTR_ERR(host->dma_chan_tx), + "unable to request tx dma channel\n"); if (host->pdata) { host->detect_delay_ms = host->pdata->detect_delay_ms; host->power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); - if (IS_ERR(host->power)) { - ret = PTR_ERR(host->power); - dev_err(dev, "Failed requesting gpio_power\n"); - goto out; - } + if (IS_ERR(host->power)) + return dev_err_probe(dev, PTR_ERR(host->power), + "Failed requesting gpio_power\n"); /* FIXME: should we pass detection delay to debounce? */ ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0); - if (ret && ret != -ENOENT) { - dev_err(dev, "Failed requesting gpio_cd\n"); - goto out; - } + if (ret && ret != -ENOENT) + return dev_err_probe(dev, ret, "Failed requesting gpio_cd\n"); if (!host->pdata->gpio_card_ro_invert) mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; ret = mmc_gpiod_request_ro(mmc, "wp", 0, 0); - if (ret && ret != -ENOENT) { - dev_err(dev, "Failed requesting gpio_ro\n"); - goto out; - } + if (ret && ret != -ENOENT) + return dev_err_probe(dev, ret, "Failed requesting gpio_ro\n"); + if (!ret) host->use_ro_gpio = true; @@ -759,16 +749,8 @@ static int pxamci_probe(struct platform_device *pdev) if (ret) { if (host->pdata && host->pdata->exit) host->pdata->exit(dev, mmc); - goto out; } - return 0; - -out: - if (host->dma_chan_rx) - dma_release_channel(host->dma_chan_rx); - if (host->dma_chan_tx) - dma_release_channel(host->dma_chan_tx); return ret; } @@ -791,8 +773,6 @@ static void pxamci_remove(struct platform_device *pdev) dmaengine_terminate_all(host->dma_chan_rx); dmaengine_terminate_all(host->dma_chan_tx); - dma_release_channel(host->dma_chan_rx); - dma_release_channel(host->dma_chan_tx); } } From 739f04f4a46237536aff07ff223c231da53ed8ce Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Tue, 4 Nov 2025 11:51:23 +0800 Subject: [PATCH 0976/1075] mmc: dw_mmc-rockchip: Fix wrong internal phase calculate ciu clock is 2 times of io clock, but the sample clk used is derived from io clock provided to the card. So we should use io clock to calculate the phase. Fixes: 59903441f5e4 ("mmc: dw_mmc-rockchip: Add internal phase support") Signed-off-by: Shawn Lin Acked-by: Heiko Stuebner Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-rockchip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c index 82dd906bb002..681354942e97 100644 --- a/drivers/mmc/host/dw_mmc-rockchip.c +++ b/drivers/mmc/host/dw_mmc-rockchip.c @@ -42,7 +42,7 @@ struct dw_mci_rockchip_priv_data { */ static int rockchip_mmc_get_internal_phase(struct dw_mci *host, bool sample) { - unsigned long rate = clk_get_rate(host->ciu_clk); + unsigned long rate = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV; u32 raw_value; u16 degrees; u32 delay_num = 0; @@ -85,7 +85,7 @@ static int rockchip_mmc_get_phase(struct dw_mci *host, bool sample) static int rockchip_mmc_set_internal_phase(struct dw_mci *host, bool sample, int degrees) { - unsigned long rate = clk_get_rate(host->ciu_clk); + unsigned long rate = clk_get_rate(host->ciu_clk) / RK3288_CLKGEN_DIV; u8 nineties, remainder; u8 delay_num; u32 raw_value; From 632108ec072ad64c8c83db6e16a7efee29ebfb74 Mon Sep 17 00:00:00 2001 From: Haein Lee Date: Wed, 12 Nov 2025 00:37:54 +0900 Subject: [PATCH 0977/1075] ALSA: usb-audio: Fix NULL pointer dereference in snd_usb_mixer_controls_badd In snd_usb_create_streams(), for UAC version 3 devices, the Interface Association Descriptor (IAD) is retrieved via usb_ifnum_to_if(). If this call fails, a fallback routine attempts to obtain the IAD from the next interface and sets a BADD profile. However, snd_usb_mixer_controls_badd() assumes that the IAD retrieved from usb_ifnum_to_if() is always valid, without performing a NULL check. This can lead to a NULL pointer dereference when usb_ifnum_to_if() fails to find the interface descriptor. This patch adds a NULL pointer check after calling usb_ifnum_to_if() in snd_usb_mixer_controls_badd() to prevent the dereference. This issue was discovered by syzkaller, which triggered the bug by sending a crafted USB device descriptor. Fixes: 17156f23e93c ("ALSA: usb: add UAC3 BADD profiles support") Signed-off-by: Haein Lee Link: https://patch.msgid.link/vwhzmoba9j2f.vwhzmob9u9e2.g6@dooray.com Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 6f00e0d52382..72b900505d2c 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -3086,6 +3086,8 @@ static int snd_usb_mixer_controls_badd(struct usb_mixer_interface *mixer, int i; assoc = usb_ifnum_to_if(dev, ctrlif)->intf_assoc; + if (!assoc) + return -EINVAL; /* Detect BADD capture/playback channels from AS EP descriptors */ for (i = 0; i < assoc->bInterfaceCount; i++) { From d93a89684dce949c2ea817b6f07feee9a45241a7 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 10 Nov 2025 16:23:52 +0100 Subject: [PATCH 0978/1075] smb: client: let smbd_disconnect_rdma_connection() turn CREATED into DISCONNECTED When smbd_disconnect_rdma_connection() turns SMBDIRECT_SOCKET_CREATED into SMBDIRECT_SOCKET_ERROR, we'll have the situation that smbd_disconnect_rdma_work() will set SMBDIRECT_SOCKET_DISCONNECTING and call rdma_disconnect(), which likely fails as we never reached the RDMA_CM_EVENT_ESTABLISHED. it means that wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED) in smbd_destroy() will hang forever in SMBDIRECT_SOCKET_DISCONNECTING never reaching SMBDIRECT_SOCKET_DISCONNECTED. So we directly go from SMBDIRECT_SOCKET_CREATED to SMBDIRECT_SOCKET_DISCONNECTED. Fixes: ffbfc73e84eb ("smb: client: let smbd_disconnect_rdma_connection() set SMBDIRECT_SOCKET_ERROR...") Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 85a4c55b61b8..c6c428c2e08d 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -290,6 +290,9 @@ static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) break; case SMBDIRECT_SOCKET_CREATED: + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + break; + case SMBDIRECT_SOCKET_CONNECTED: sc->status = SMBDIRECT_SOCKET_ERROR; break; From fdf302e6bea1822a9144a0cc2e8e17527e746162 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 10 Nov 2025 14:19:13 +0100 Subject: [PATCH 0979/1075] gendwarfksyms: Skip files with no exports Starting with Rust 1.91.0 (released 2025-10-30), in upstream commit ab91a63d403b ("Ignore intrinsic calls in cross-crate-inlining cost model") [1][2], `bindings.o` stops containing DWARF debug information because the `Default` implementations contained `write_bytes()` calls which are now ignored in that cost model (note that `CLIPPY=1` does not reproduce it). This means `gendwarfksyms` complains: RUSTC L rust/bindings.o error: gendwarfksyms: process_module: dwarf_get_units failed: no debugging information? There are several alternatives that would work here: conditionally skipping in the cases needed (but that is subtle and brittle), forcing DWARF generation with e.g. a dummy `static` (ugly and we may need to do it in several crates), skipping the call to the tool in the Kbuild command when there are no exports (fine) or teaching the tool to do so itself (simple and clean). Thus do the last one: don't attempt to process files if we have no symbol versions to calculate. [ I used the commit log of my patch linked below since it explained the root issue and expanded it a bit more to summarize the alternatives. - Miguel ] Cc: stable@vger.kernel.org # Needed in 6.17.y. Reported-by: Haiyue Wang Closes: https://lore.kernel.org/rust-for-linux/b8c1c73d-bf8b-4bf2-beb1-84ffdcd60547@163.com/ Suggested-by: Miguel Ojeda Link: https://lore.kernel.org/rust-for-linux/CANiq72nKC5r24VHAp9oUPR1HVPqT+=0ab9N0w6GqTF-kJOeiSw@mail.gmail.com/ Link: https://github.com/rust-lang/rust/commit/ab91a63d403b0105cacd72809cd292a72984ed99 [1] Link: https://github.com/rust-lang/rust/pull/145910 [2] Signed-off-by: Sami Tolvanen Tested-by: Haiyue Wang Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20251110131913.1789896-1-ojeda@kernel.org Signed-off-by: Miguel Ojeda --- scripts/gendwarfksyms/gendwarfksyms.c | 3 ++- scripts/gendwarfksyms/gendwarfksyms.h | 2 +- scripts/gendwarfksyms/symbols.c | 4 +++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c index 08ae61eb327e..f5203d1640ee 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.c +++ b/scripts/gendwarfksyms/gendwarfksyms.c @@ -138,7 +138,8 @@ int main(int argc, char **argv) error("no input files?"); } - symbol_read_exports(stdin); + if (!symbol_read_exports(stdin)) + return 0; if (symtypes_file) { symfile = fopen(symtypes_file, "w"); diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h index d9c06d2cb1df..32cec8f7695a 100644 --- a/scripts/gendwarfksyms/gendwarfksyms.h +++ b/scripts/gendwarfksyms/gendwarfksyms.h @@ -123,7 +123,7 @@ struct symbol { typedef void (*symbol_callback_t)(struct symbol *, void *arg); bool is_symbol_ptr(const char *name); -void symbol_read_exports(FILE *file); +int symbol_read_exports(FILE *file); void symbol_read_symtab(int fd); struct symbol *symbol_get(const char *name); void symbol_set_ptr(struct symbol *sym, Dwarf_Die *ptr); diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c index 35ed594f0749..ecddcb5ffcdf 100644 --- a/scripts/gendwarfksyms/symbols.c +++ b/scripts/gendwarfksyms/symbols.c @@ -128,7 +128,7 @@ static bool is_exported(const char *name) return for_each(name, NULL, NULL) > 0; } -void symbol_read_exports(FILE *file) +int symbol_read_exports(FILE *file) { struct symbol *sym; char *line = NULL; @@ -159,6 +159,8 @@ void symbol_read_exports(FILE *file) free(line); debug("%d exported symbols", nsym); + + return nsym; } static void get_symbol(struct symbol *sym, void *arg) From bb8336a5163a5839476f27ed1ad69df4a19e13ca Mon Sep 17 00:00:00 2001 From: Kriish Sharma Date: Mon, 10 Nov 2025 18:25:45 +0000 Subject: [PATCH 0980/1075] ethtool: fix incorrect kernel-doc style comment in ethtool.h Building documentation produced the following warning: WARNING: ./include/linux/ethtool.h:495 This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for This comment was not intended to be parsed as kernel-doc, so replace the '/**' with '/*' to silence the warning and align with normal comment style in header files. No functional changes. Signed-off-by: Kriish Sharma Link: https://patch.msgid.link/20251110182545.2112596-1-kriish.sharma2006@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c2d8b4ec62eb..5c9162193d26 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -492,7 +492,7 @@ struct ethtool_pause_stats { }; #define ETHTOOL_MAX_LANES 8 -/** +/* * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for * the end-of-list marker, total 17 items */ From 63c643aa7b7287fdbb0167063785f89ece3f000f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:40 +0100 Subject: [PATCH 0981/1075] selftests: mptcp: connect: fix fallback note due to OoO The "fallback due to TCP OoO" was never printed because the stat_ooo_now variable was checked twice: once in the parent if-statement, and one in the child one. The second condition was then always true then, and the 'else' branch was never taken. The idea is that when there are more ACK + MP_CAPABLE than expected, the test either fails if there was no out of order packets, or a notice is printed. Fixes: 69ca3d29a755 ("mptcp: update selftest for fallback due to OoO") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-1-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 47ecb5b3836e..9b7b93f8eb0c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -492,7 +492,7 @@ do_transfer() "than expected (${expect_synrx})" retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then if [ ${stat_ooo_now} -eq 0 ]; then mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ "than expected (${expect_ackrx})" From aea73bae662a0e184393d6d7d0feb18d2577b9b9 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:41 +0100 Subject: [PATCH 0982/1075] selftests: mptcp: join: rm: set backup flag Some of these 'remove' tests rarely fail because a subflow has been reset instead of cleanly removed. This can happen when one extra subflow which has never carried data is being closed (FIN) on one side, while the other is sending data for the first time. To avoid such subflows to be used right at the end, the backup flag has been added. With that, data will be only carried on the initial subflow. Fixes: d2c4333a801c ("selftests: mptcp: add testcases for removing addrs") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-2-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 78a1aa4ecff2..9ed9ec7202d6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2532,7 +2532,7 @@ remove_tests() if reset "remove single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 @@ -2545,8 +2545,8 @@ remove_tests() if reset "remove multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-2 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2557,7 +2557,7 @@ remove_tests() # single address, remove if reset "remove single address"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 1 addr_nr_ns1=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2570,9 +2570,9 @@ remove_tests() # subflow and signal, remove if reset "remove subflow and signal"; then pm_nl_set_limits $ns1 0 2 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 2 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2584,10 +2584,10 @@ remove_tests() # subflows and signal, remove if reset "remove subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2599,9 +2599,9 @@ remove_tests() # addresses remove if reset "remove addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2614,10 +2614,10 @@ remove_tests() # invalid addresses remove if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup # broadcast IP: no packet for this address will be received on ns1 - pm_nl_add_endpoint $ns1 224.0.0.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2631,10 +2631,10 @@ remove_tests() # subflows and signal, flush if reset "flush subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2647,9 +2647,9 @@ remove_tests() if reset "flush subflows"; then pm_nl_set_limits $ns1 3 3 pm_nl_set_limits $ns2 3 3 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2666,9 +2666,9 @@ remove_tests() # addresses flush if reset "flush addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2681,9 +2681,9 @@ remove_tests() # invalid addresses flush if reset "flush invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 From 6457595db9870298ee30b6d75287b8548e33fe19 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:42 +0100 Subject: [PATCH 0983/1075] selftests: mptcp: join: endpoints: longer transfer In rare cases, when the test environment is very slow, some userspace tests can fail because some expected events have not been seen. Because the tests are expecting a long on-going connection, and they are not waiting for the end of the transfer, it is fine to make the connection longer. This connection will be killed at the end, after the verifications, so making it longer doesn't change anything, apart from avoid it to end before the end of the verifications To play it safe, all endpoints tests not waiting for the end of the transfer are now sharing a longer file (128KB) at slow speed. Fixes: 69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case") Cc: stable@vger.kernel.org Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases") Fixes: b5e2fb832f48 ("selftests: mptcp: add explicit test case for remove/readd") Fixes: e06959e9eebd ("selftests: mptcp: join: test for flush/re-add endpoints") Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-3-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9ed9ec7202d6..97af8d89ac5c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3943,7 +3943,7 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - { speed=slow \ + { test_linkfail=128 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -3970,7 +3970,7 @@ endpoint_tests() pm_nl_set_limits $ns2 0 3 pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - { test_linkfail=4 speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4048,7 +4048,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal - { test_linkfail=4 speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4121,7 +4121,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - { test_linkfail=4 speed=20 \ + { test_linkfail=128 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! From 290493078b96ce2ce3e60f55c23654acb678042a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:43 +0100 Subject: [PATCH 0984/1075] selftests: mptcp: join: userspace: longer transfer In rare cases, when the test environment is very slow, some userspace tests can fail because some expected events have not been seen. Because the tests are expecting a long on-going connection, and they are not waiting for the end of the transfer, it is fine to make the connection longer. This connection will be killed at the end, after the verifications, so making it longer doesn't change anything, apart from avoid it to end before the end of the verifications To play it safe, all userspace tests not waiting for the end of the transfer are now sharing a longer file (128KB) at slow speed. Fixes: 4369c198e599 ("selftests: mptcp: test userspace pm out of transfer") Cc: stable@vger.kernel.org Fixes: b2e2248f365a ("selftests: mptcp: userspace pm create id 0 subflow") Fixes: e3b47e460b4b ("selftests: mptcp: userspace pm remove initial subflow") Fixes: b9fb176081fb ("selftests: mptcp: userspace pm send RM_ADDR for ID 0") Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-4-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 97af8d89ac5c..01273abfdc89 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3806,7 +3806,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3839,7 +3839,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3867,7 +3867,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3888,7 +3888,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3912,7 +3912,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 From ee79980f7a428ec299f6261bea4c1084dcbc9631 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:44 +0100 Subject: [PATCH 0985/1075] selftests: mptcp: connect: trunc: read all recv data MPTCP Join "fastclose server" selftest is sometimes failing because the client output file doesn't have the expected size, e.g. 296B instead of 1024B. When looking at a packet trace when this happens, the server sent the expected 1024B in two parts -- 100B, then 924B -- then the MP_FASTCLOSE. It is then strange to see the client only receiving 296B, which would mean it only got a part of the second packet. The problem is then not on the networking side, but rather on the data reception side. When mptcp_connect is launched with '-f -1', it means the connection might stop before having sent everything, because a reset has been received. When this happens, the program was directly stopped. But it is also possible there are still some data to read, simply because the previous 'read' step was done with a buffer smaller than the pending data, see do_rnd_read(). In this case, it is important to read what's left in the kernel buffers before stopping without error like before. SIGPIPE is now ignored, not to quit the app before having read everything. Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-5-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/net/mptcp/mptcp_connect.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index b148cadb96d0..fc7e22b503d3 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -710,8 +710,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { - if (cfg_rcv_trunc) - return 0; + /* expected reset, continue to read */ + if (cfg_rcv_trunc && + (errno == ECONNRESET || + errno == EPIPE)) { + fds.events &= ~POLLOUT; + continue; + } + perror("write"); return 111; } @@ -737,8 +743,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, } if (fds.revents & (POLLERR | POLLNVAL)) { - if (cfg_rcv_trunc) - return 0; + if (cfg_rcv_trunc) { + fds.events &= ~(POLLERR | POLLNVAL); + continue; + } fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; @@ -1433,7 +1441,7 @@ static void parse_opts(int argc, char **argv) */ if (cfg_truncate < 0) { cfg_rcv_trunc = true; - signal(SIGPIPE, handle_signal); + signal(SIGPIPE, SIG_IGN); } break; case 'j': From 852b644acbce1529307a4bb283752c4e77b5cda7 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:45 +0100 Subject: [PATCH 0986/1075] selftests: mptcp: join: properly kill background tasks The 'run_tests' function is executed in the background, but killing its associated PID would not kill the children tasks running in the background. To properly kill all background tasks, 'kill -- -PID' could be used, but this requires kill from procps-ng. Instead, all children tasks are listed using 'ps', and 'kill' is called with all PIDs of this group. Fixes: 31ee4ad86afd ("selftests: mptcp: join: stop transfer when check is done (part 1)") Cc: stable@vger.kernel.org Fixes: 04b57c9e096a ("selftests: mptcp: join: stop transfer when check is done (part 2)") Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-6-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/mptcp/mptcp_join.sh | 18 ++++++++-------- .../testing/selftests/net/mptcp/mptcp_lib.sh | 21 +++++++++++++++++++ 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 01273abfdc89..41503c241989 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3831,7 +3831,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create destroy subflow @@ -3859,7 +3859,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create id 0 subflow @@ -3880,7 +3880,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm remove initial subflow @@ -3904,7 +3904,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -3930,7 +3930,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi } @@ -3960,7 +3960,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && @@ -4015,7 +4015,7 @@ endpoint_tests() chk_mptcp_info subflows 3 subflows 3 done - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -4089,7 +4089,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -4137,7 +4137,7 @@ endpoint_tests() wait_mpj $ns2 pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal wait_mpj $ns2 - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid join_syn_tx=3 join_connect_err=1 \ chk_join_nr 2 2 2 diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index d62e653d48b0..f4388900016a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -350,6 +350,27 @@ mptcp_lib_kill_wait() { wait "${1}" 2>/dev/null } +# $1: PID +mptcp_lib_pid_list_children() { + local curr="${1}" + # evoke 'ps' only once + local pids="${2:-"$(ps o pid,ppid)"}" + + echo "${curr}" + + local pid + for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do + mptcp_lib_pid_list_children "${pid}" "${pids}" + done +} + +# $1: PID +mptcp_lib_kill_group_wait() { + # Some users might not have procps-ng: cannot use "kill -- -PID" + mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null + wait "${1}" 2>/dev/null +} + # $1: IP address mptcp_lib_is_v6() { [ -z "${1##*:*}" ] From d58041d2c63e09a1c9083e0e9f4151e487c4e16a Mon Sep 17 00:00:00 2001 From: Magnus Lindholm Date: Tue, 4 Nov 2025 11:33:43 +0100 Subject: [PATCH 0987/1075] MAINTAINERS: Add Magnus Lindholm as maintainer for alpha port Acked-by: John Paul Adrian Glaubitz Acked-by: Matt Turner Signed-off-by: Magnus Lindholm Signed-off-by: Matt Turner --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index f0c8b85baa6b..69e27908f74c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -915,6 +915,7 @@ F: drivers/staging/media/sunxi/cedrus/ ALPHA PORT M: Richard Henderson M: Matt Turner +M: Magnus Lindholm L: linux-alpha@vger.kernel.org S: Odd Fixes F: arch/alpha/ From 0345552a653ce5542affeb69ac5aa52177a5199b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 9 Nov 2025 16:12:15 +0000 Subject: [PATCH 0988/1075] net_sched: limit try_bulk_dequeue_skb() batches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 100dfa74cad9 ("inet: dev_queue_xmit() llist adoption") I started seeing many qdisc requeues on IDPF under high TX workload. $ tc -s qd sh dev eth1 handle 1: ; sleep 1; tc -s qd sh dev eth1 handle 1: qdisc mq 1: root Sent 43534617319319 bytes 268186451819 pkt (dropped 0, overlimits 0 requeues 3532840114) backlog 1056Kb 6675p requeues 3532840114 qdisc mq 1: root Sent 43554665866695 bytes 268309964788 pkt (dropped 0, overlimits 0 requeues 3537737653) backlog 781164b 4822p requeues 3537737653 This is caused by try_bulk_dequeue_skb() being only limited by BQL budget. perf record -C120-239 -e qdisc:qdisc_dequeue sleep 1 ; perf script ... netperf 75332 [146] 2711.138269: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1292 skbaddr=0xff378005a1e9f200 netperf 75332 [146] 2711.138953: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1213 skbaddr=0xff378004d607a500 netperf 75330 [144] 2711.139631: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1233 skbaddr=0xff3780046be20100 netperf 75333 [147] 2711.140356: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1093 skbaddr=0xff37800514845b00 netperf 75337 [151] 2711.141037: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1353 skbaddr=0xff37800460753300 netperf 75337 [151] 2711.141877: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1367 skbaddr=0xff378004e72c7b00 netperf 75330 [144] 2711.142643: qdisc:qdisc_dequeue: dequeue ifindex=5 qdisc handle=0x80150000 parent=0x10013 txq_state=0x0 packets=1202 skbaddr=0xff3780045bd60000 ... This is bad because : 1) Large batches hold one victim cpu for a very long time. 2) Driver often hit their own TX ring limit (all slots are used). 3) We call dev_requeue_skb() 4) Requeues are using a FIFO (q->gso_skb), breaking qdisc ability to implement FQ or priority scheduling. 5) dequeue_skb() gets packets from q->gso_skb one skb at a time with no xmit_more support. This is causing many spinlock games between the qdisc and the device driver. Requeues were supposed to be very rare, lets keep them this way. Limit batch sizes to /proc/sys/net/core/dev_weight (default 64) as __qdisc_run() was designed to use. Fixes: 5772e9a3463b ("qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE") Signed-off-by: Eric Dumazet Reviewed-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/20251109161215.2574081-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_generic.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 1e008a228ebd..7dee9748a56b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -180,9 +180,10 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) static void try_bulk_dequeue_skb(struct Qdisc *q, struct sk_buff *skb, const struct netdev_queue *txq, - int *packets) + int *packets, int budget) { int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; + int cnt = 0; while (bytelimit > 0) { struct sk_buff *nskb = q->dequeue(q); @@ -193,8 +194,10 @@ static void try_bulk_dequeue_skb(struct Qdisc *q, bytelimit -= nskb->len; /* covers GSO len */ skb->next = nskb; skb = nskb; - (*packets)++; /* GSO counts as one pkt */ + if (++cnt >= budget) + break; } + (*packets) += cnt; skb_mark_not_on_list(skb); } @@ -228,7 +231,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q, * A requeued skb (via q->gso_skb) can also be a SKB list. */ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, - int *packets) + int *packets, int budget) { const struct netdev_queue *txq = q->dev_queue; struct sk_buff *skb = NULL; @@ -295,7 +298,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, if (skb) { bulk: if (qdisc_may_bulk(q)) - try_bulk_dequeue_skb(q, skb, txq, packets); + try_bulk_dequeue_skb(q, skb, txq, packets, budget); else try_bulk_dequeue_skb_slow(q, skb, packets); } @@ -387,7 +390,7 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, * >0 - queue is not empty. * */ -static inline bool qdisc_restart(struct Qdisc *q, int *packets) +static inline bool qdisc_restart(struct Qdisc *q, int *packets, int budget) { spinlock_t *root_lock = NULL; struct netdev_queue *txq; @@ -396,7 +399,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets) bool validate; /* Dequeue packet */ - skb = dequeue_skb(q, &validate, packets); + skb = dequeue_skb(q, &validate, packets, budget); if (unlikely(!skb)) return false; @@ -414,7 +417,7 @@ void __qdisc_run(struct Qdisc *q) int quota = READ_ONCE(net_hotdata.dev_tx_weight); int packets; - while (qdisc_restart(q, &packets)) { + while (qdisc_restart(q, &packets, quota)) { quota -= packets; if (quota <= 0) { if (q->flags & TCQ_F_NOLOCK) From 22a36e660d014925114feb09a2680bb3c2d1e279 Mon Sep 17 00:00:00 2001 From: Vitaly Prosyak Date: Thu, 6 Nov 2025 12:35:53 -0500 Subject: [PATCH 0989/1075] drm/amdgpu: disable peer-to-peer access for DCC-enabled GC12 VRAM surfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain multi-GPU configurations (especially GFX12) may hit data corruption when a DCC-compressed VRAM surface is shared across GPUs using peer-to-peer (P2P) DMA transfers. Such surfaces rely on device-local metadata and cannot be safely accessed through a remote GPU’s page tables. Attempting to import a DCC-enabled surface through P2P leads to incorrect rendering or GPU faults. This change disables P2P for DCC-enabled VRAM buffers that are contiguous and allocated on GFX12+ hardware. In these cases, the importer falls back to the standard system-memory path, avoiding invalid access to compressed surfaces. Future work could consider optional migration (VRAM→System→VRAM) if a performance regression is observed when `attach->peer2peer = false`. Tested on: - Dual RX 9700 XT (Navi4x) setup - GNOME and Wayland compositor scenarios - Confirmed no corruption after disabling P2P under these conditions v2: Remove check TTM_PL_VRAM & TTM_PL_FLAG_CONTIGUOUS. v3: simplify for upsteam and fix ip version check (Alex) Suggested-by: Christian König Signed-off-by: Vitaly Prosyak Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit 9dff2bb709e6fbd97e263fd12bf12802d2b5a0cf) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 8561ad7f6180..ed3bef1edfe4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -82,6 +82,18 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + /* + * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+. + * Such buffers cannot be safely accessed over P2P due to device-local + * compression metadata. Fallback to system-memory path instead. + * Device supports GFX12 (GC 12.x or newer) + * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag + * + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0) && + bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + attach->peer2peer = false; + if (!amdgpu_dmabuf_is_xgmi_accessible(attach_adev, bo) && pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; From 9f8fd538e244b87e4556833da51ddd986f50cc81 Mon Sep 17 00:00:00 2001 From: Pierre-Eric Pelloux-Prayer Date: Tue, 4 Nov 2025 10:42:45 +0100 Subject: [PATCH 0990/1075] drm/amdgpu: jump to the correct label on failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_sched_entity_init wasn't called yet, so the only thing to do is to release allocated memory. This doesn't fix any bug since entity is zero allocated and drm_sched_entity_fini does nothing in this case. Signed-off-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Tvrtko Ursulin Acked-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit ec49374ccb8da86b465beaf09c367f3dfd648d8f) --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index f5d5c45ddc0d..afedea02188d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -236,7 +236,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, r = amdgpu_xcp_select_scheds(adev, hw_ip, hw_prio, fpriv, &num_scheds, &scheds); if (r) - goto cleanup_entity; + goto error_free_entity; } /* disable load balance if the hw engine retains context among dependent jobs */ From 6623c5f9fd877868fba133b4ae4dab0052e82dad Mon Sep 17 00:00:00 2001 From: "Jesse.Zhang" Date: Fri, 24 Oct 2025 16:09:25 +0800 Subject: [PATCH 0991/1075] drm/amdgpu: fix lock warning in amdgpu_userq_fence_driver_process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a potential deadlock caused by inconsistent spinlock usage between interrupt and process contexts in the userq fence driver. The issue occurs when amdgpu_userq_fence_driver_process() is called from both: - Interrupt context: gfx_v11_0_eop_irq() -> amdgpu_userq_fence_driver_process() - Process context: amdgpu_eviction_fence_suspend_worker() -> amdgpu_userq_fence_driver_force_completion() -> amdgpu_userq_fence_driver_process() In interrupt context, the spinlock was acquired without disabling interrupts, leaving it in {IN-HARDIRQ-W} state. When the same lock is acquired in process context, the kernel detects inconsistent locking since the process context acquisition would enable interrupts while holding a lock previously acquired in interrupt context. Kernel log shows: [ 4039.310790] inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. [ 4039.310804] kworker/7:2/409 [HC0[0]:SC0[0]:HE1:SE1] takes: [ 4039.310818] ffff9284e1bed000 (&fence_drv->fence_list_lock){?...}-{3:3}, [ 4039.310993] {IN-HARDIRQ-W} state was registered at: [ 4039.311004] lock_acquire+0xc6/0x300 [ 4039.311018] _raw_spin_lock+0x39/0x80 [ 4039.311031] amdgpu_userq_fence_driver_process.part.0+0x30/0x180 [amdgpu] [ 4039.311146] amdgpu_userq_fence_driver_process+0x17/0x30 [amdgpu] [ 4039.311257] gfx_v11_0_eop_irq+0x132/0x170 [amdgpu] Fix by using spin_lock_irqsave()/spin_unlock_irqrestore() to properly manage interrupt state regardless of calling context. Reviewed-by: Christian König Signed-off-by: Jesse Zhang Signed-off-by: Alex Deucher (cherry picked from commit ded3ad780cf97a04927773c4600823b84f7f3cc2) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c index 761bad98da3e..4d0096d0baa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c @@ -151,15 +151,16 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d { struct amdgpu_userq_fence *userq_fence, *tmp; struct dma_fence *fence; + unsigned long flags; u64 rptr; int i; if (!fence_drv) return; + spin_lock_irqsave(&fence_drv->fence_list_lock, flags); rptr = amdgpu_userq_fence_read(fence_drv); - spin_lock(&fence_drv->fence_list_lock); list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences, link) { fence = &userq_fence->base; @@ -174,7 +175,7 @@ void amdgpu_userq_fence_driver_process(struct amdgpu_userq_fence_driver *fence_d list_del(&userq_fence->link); dma_fence_put(fence); } - spin_unlock(&fence_drv->fence_list_lock); + spin_unlock_irqrestore(&fence_drv->fence_list_lock, flags); } void amdgpu_userq_fence_driver_destroy(struct kref *ref) From 33c995709121a3a29d4567a08c943bf7a5b24b78 Mon Sep 17 00:00:00 2001 From: Ivan Lipski Date: Thu, 23 Oct 2025 10:03:59 -0400 Subject: [PATCH 0992/1075] drm/amd/display: Allow VRR params change if unsynced with the stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] When changing resolution (e.g., 4K → FHD) in mirror/clone mode with certain monitors, the monitor blanks and loses connection due to an early exit in vrr_settings_require_update(). The function only checks if VRR state, fixed refresh target, or min/max refresh rate range has changed. During mode changes, if the calculated min/max refresh values remain the same even though the stream's v_total changed, the function returns early without updating vrr_params.adjust.v_total_min/max, leaving the monitor's VRR timing parameters unsynced with the new mode, causing it to blank out. [How] Explicitly adjust VRR parameters to the stream's nominal v_total when VRR is supported, but inactive. Fixes: 6d31602a9f57 ("drm/amd/display: more liberal vmin/vmax update for freesync") Reviewed-by: Aurabindo Pillai Signed-off-by: Ivan Lipski Signed-off-by: Fangzhi Zuo Tested-by: Dan Wheeler Signed-off-by: Alex Deucher (cherry picked from commit 607df8248a011524211ee34850345305a1913f9e) --- .../gpu/drm/amd/display/modules/freesync/freesync.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index ce421bcddcb0..1aae46d703ba 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -1260,6 +1260,17 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync, update_v_total_for_static_ramp( core_freesync, stream, in_out_vrr); } + + /* + * If VRR is inactive, set vtotal min and max to nominal vtotal + */ + if (in_out_vrr->state == VRR_STATE_INACTIVE) { + in_out_vrr->adjust.v_total_min = + mod_freesync_calc_v_total_from_refresh(stream, + in_out_vrr->max_refresh_in_uhz); + in_out_vrr->adjust.v_total_max = in_out_vrr->adjust.v_total_min; + return; + } } unsigned long long mod_freesync_calc_nominal_field_rate( From 7132f7e025f9382157543dd86a62d161335b48b9 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Fri, 7 Nov 2025 13:07:13 -0500 Subject: [PATCH 0993/1075] drm/amd/amdgpu: Ensure isp_kernel_buffer_alloc() creates a new BO When the BO pointer provided to amdgpu_bo_create_kernel() points to non-NULL, amdgpu_bo_create_kernel() takes it as a hint to pin that address rather than allocate a new BO. This functionality is never desired for allocating ISP buffers. A new BO should always be created when isp_kernel_buffer_alloc() is called, per the description for isp_kernel_buffer_alloc(). Ensure this by zeroing *bo right before the amdgpu_bo_create_kernel() call. Fixes: 55d42f616976 ("drm/amd/amdgpu: Add helper functions for isp buffers") Reviewed-by: Mario Limonciello (AMD) Reviewed-by: Pratap Nirujogi Signed-off-by: Sultan Alsawaf Signed-off-by: Alex Deucher (cherry picked from commit 73c8c29baac7f0c7e703d92eba009008cbb5228e) --- drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 9cddbf50442a..37270c4dab8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -280,6 +280,8 @@ int isp_kernel_buffer_alloc(struct device *dev, u64 size, if (ret) return ret; + /* Ensure *bo is NULL so a new BO will be created */ + *bo = NULL; ret = amdgpu_bo_create_kernel(adev, size, ISP_MC_ADDR_ALIGN, From bbe3c115030da431c9ec843c18d5583e59482dd2 Mon Sep 17 00:00:00 2001 From: Sathishkumar S Date: Tue, 7 Oct 2025 13:17:51 +0530 Subject: [PATCH 0994/1075] drm/amdgpu/jpeg: Add parse_cs for JPEG5_0_1 enable parse_cs callback for JPEG5_0_1. Signed-off-by: Sathishkumar S Reviewed-by: Leo Liu Signed-off-by: Alex Deucher (cherry picked from commit 547985579932c1de13f57f8bcf62cd9361b9d3d3) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c index baf097d2e1ac..ab0bf880d3d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -878,6 +878,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, + .parse_cs = amdgpu_jpeg_dec_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + From d15deafab5d722afb9e2f83c5edcdef9d9d98bd1 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 6 Nov 2025 10:17:06 -0500 Subject: [PATCH 0995/1075] drm/amdkfd: relax checks for over allocation of save area Over allocation of save area is not fatal, only under allocation is. ROCm has various components that independently claim authority over save area size. Unless KFD decides to claim single authority, relax size checks. Signed-off-by: Jonathan Kim Reviewed-by: Philip Yang Signed-off-by: Alex Deucher (cherry picked from commit 15bd4958fe38e763bc17b607ba55155254a01f55) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index a65c67cf56ff..f1e7583650c4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -297,16 +297,16 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope goto out_err_unreserve; } - if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { - pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", + if (properties->ctx_save_restore_area_size < topo_dev->node_props.cwsr_size) { + pr_debug("queue cwsr size 0x%x not sufficient for node cwsr size 0x%x\n", properties->ctx_save_restore_area_size, topo_dev->node_props.cwsr_size); err = -EINVAL; goto out_err_unreserve; } - total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) - * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = (properties->ctx_save_restore_area_size + + topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask); total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, @@ -352,8 +352,8 @@ int kfd_queue_release_buffers(struct kfd_process_device *pdd, struct queue_prope topo_dev = kfd_topology_device_by_id(pdd->dev->id); if (!topo_dev) return -EINVAL; - total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) - * NUM_XCC(pdd->dev->xcc_mask); + total_cwsr_size = (properties->ctx_save_restore_area_size + + topo_dev->node_props.debug_memory_size) * NUM_XCC(pdd->dev->xcc_mask); total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); kfd_queue_buffer_svm_put(pdd, properties->ctx_save_restore_area_address, total_cwsr_size); From eac32ff42393efa6657efc821231b8d802c1d485 Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Tue, 28 Oct 2025 14:37:07 -0400 Subject: [PATCH 0996/1075] drm/amdkfd: Fix GPU mappings for APU after prefetch Fix the following corner case:- Consider a 2M huge page SVM allocation, followed by prefetch call for the first 4K page. The whole range is initially mapped with single PTE. After the prefetch, this range gets split to first page + rest of the pages. Currently, the first page mapping is not updated on MI300A (APU) since page hasn't migrated. However, after range split PTE mapping it not valid. Fix this by forcing page table update for the whole range when prefetch is called. Calling prefetch on APU doesn't improve performance. If all it deteriotes. However, functionality has to be supported. v2: Use apu_prefer_gtt as this issue doesn't apply to APUs with carveout VRAM v3: Simplify by setting the flag for all ASICs as it doesn't affect dGPU v4: Remove v2 and v3 changes. Force update_mapping when range is split at a size that is not aligned to prange granularity Suggested-by: Philip Yang Signed-off-by: Harish Kasiviswanathan Reviewed-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher (cherry picked from commit 076470b9f6f8d9c7c8ca73a9f054942a686f9ba7) --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 9d72411c3379..74a1d3e1d52b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3687,6 +3687,8 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, svm_range_apply_attrs(p, prange, nattr, attrs, &update_mapping); /* TODO: unmap ranges from GPU that lost access */ } + update_mapping |= !p->xnack_enabled && !list_empty(&remap_list); + list_for_each_entry_safe(prange, next, &remove_list, update_list) { pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, From f4c737d44969c38dac2478039d353edddffd120d Mon Sep 17 00:00:00 2001 From: Junjie Cao Date: Thu, 16 Oct 2025 09:49:19 +0800 Subject: [PATCH 0997/1075] wifi: iwlwifi: fix aux ROC time event iterator usage The list_for_each_entry() iterator must not be used outside the loop. Even though we break and check for NULL, doing so still violates kernel iteration rules and triggers Coccinelle's use_after_iter.cocci warning. Cache the matched entry in aux_roc_te and use it consistently after the loop. This follows iterator best practices, resolves the warning, and makes the code more maintainable. Signed-off-by: Junjie Cao Link: https://patch.msgid.link/20251016014919.383565-1-junjie.cao@intel.com Signed-off-by: Miri Korenblit --- .../net/wireless/intel/iwlwifi/mvm/time-event.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 0c9c2492d8a7..0b12ee8ad618 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -463,7 +463,7 @@ static int iwl_mvm_aux_roc_te_handle_notif(struct iwl_mvm *mvm, if (!aux_roc_te) /* Not a Aux ROC time event */ return -EINVAL; - iwl_mvm_te_check_trigger(mvm, notif, te_data); + iwl_mvm_te_check_trigger(mvm, notif, aux_roc_te); IWL_DEBUG_TE(mvm, "Aux ROC time event notification - UID = 0x%x action %d (error = %d)\n", @@ -475,14 +475,14 @@ static int iwl_mvm_aux_roc_te_handle_notif(struct iwl_mvm *mvm, /* End TE, notify mac80211 */ ieee80211_remain_on_channel_expired(mvm->hw); iwl_mvm_roc_finished(mvm); /* flush aux queue */ - list_del(&te_data->list); /* remove from list */ - te_data->running = false; - te_data->vif = NULL; - te_data->uid = 0; - te_data->id = TE_MAX; + list_del(&aux_roc_te->list); /* remove from list */ + aux_roc_te->running = false; + aux_roc_te->vif = NULL; + aux_roc_te->uid = 0; + aux_roc_te->id = TE_MAX; } else if (le32_to_cpu(notif->action) == TE_V2_NOTIF_HOST_EVENT_START) { set_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status); - te_data->running = true; + aux_roc_te->running = true; ieee80211_ready_on_channel(mvm->hw); /* Start TE */ } else { IWL_DEBUG_TE(mvm, From 3592c0083fb29cca13cd9978b8844d58b4eff548 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 8 Oct 2025 11:20:44 +0200 Subject: [PATCH 0998/1075] wifi: iwlwifi: mvm: fix beacon template/fixed rate During the development of the rate changes, I evidently made some changes that shouldn't have been there; beacon templates with rate_n_flags are only in old versions, so no changes to them should have been necessary, and evidently broke on some devices. This also would have broken fixed (injection) rates, it would seem. Restore the old handling of this. Fixes: dabc88cb3b78 ("wifi: iwlwifi: handle v3 rates") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220558 Reviewed-by: Benjamin Berg Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20251008112044.3bb8ea849d8d.I90f4d2b2c1f62eaedaf304a61d2ab9e50c491c2d@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c | 13 +++---------- drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 9c9e0e1c6e1d..867807abde66 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -938,19 +938,12 @@ u8 iwl_mvm_mac_ctxt_get_lowest_rate(struct iwl_mvm *mvm, u16 iwl_mvm_mac_ctxt_get_beacon_flags(const struct iwl_fw *fw, u8 rate_idx) { + u16 flags = iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); bool is_new_rate = iwl_fw_lookup_cmd_ver(fw, BEACON_TEMPLATE_CMD, 0) > 10; - u16 flags, cck_flag; - - if (is_new_rate) { - flags = iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); - cck_flag = IWL_MAC_BEACON_CCK; - } else { - cck_flag = IWL_MAC_BEACON_CCK_V1; - flags = iwl_fw_rate_idx_to_plcp(rate_idx); - } if (rate_idx <= IWL_LAST_CCK_RATE) - flags |= cck_flag; + flags |= is_new_rate ? IWL_MAC_BEACON_CCK + : IWL_MAC_BEACON_CCK_V1; return flags; } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 22602c32faa5..fa995e235d9b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -159,9 +159,15 @@ int iwl_mvm_legacy_rate_to_mac80211_idx(u32 rate_n_flags, u8 iwl_mvm_mac80211_idx_to_hwrate(const struct iwl_fw *fw, int rate_idx) { - return (rate_idx >= IWL_FIRST_OFDM_RATE ? - rate_idx - IWL_FIRST_OFDM_RATE : - rate_idx); + if (iwl_fw_lookup_cmd_ver(fw, TX_CMD, 0) > 8) + /* In the new rate legacy rates are indexed: + * 0 - 3 for CCK and 0 - 7 for OFDM. + */ + return (rate_idx >= IWL_FIRST_OFDM_RATE ? + rate_idx - IWL_FIRST_OFDM_RATE : + rate_idx); + + return iwl_fw_rate_idx_to_plcp(rate_idx); } u8 iwl_mvm_mac80211_ac_to_ucode_ac(enum ieee80211_ac_numbers ac) From 1a222625b468effd13d1ebb662c36a41c28a835a Mon Sep 17 00:00:00 2001 From: Miri Korenblit Date: Mon, 10 Nov 2025 14:57:00 +0200 Subject: [PATCH 0999/1075] wifi: iwlwifi: mld: always take beacon ies in link grading One of the factors of a link's grade is the channel load, which is calculated from the AP's bss load element. The current code takes this element from the beacon for an active link, and from bss->ies for an inactive link. bss->ies is set to either the beacon's ies or to the probe response ones, with preference to the probe response (meaning that if there was even one probe response, the ies of it will be stored in bss->ies and won't be overiden by the beacon ies). The probe response can be very old, i.e. from the connection time, where a beacon is updated before each link selection (which is triggered only after a passive scan). In such case, the bss load element in the probe response will not include the channel load caused by the STA, where the beacon will. This will cause the inactive link to always have a lower channel load, and therefore an higher grade than the active link's one. This causes repeated link switches, causing the throughput to drop. Fix this by always taking the ies from the beacon, as those are for sure new. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Reviewed-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251110145652.b493dbb1853a.I058ba7309c84159f640cc9682d1bda56dd56a536@changeid Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/link.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index 60d814bf5779..f6f52d297a72 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -708,18 +708,13 @@ static int iwl_mld_get_chan_load_from_element(struct iwl_mld *mld, struct ieee80211_bss_conf *link_conf) { - struct ieee80211_vif *vif = link_conf->vif; const struct cfg80211_bss_ies *ies; const struct element *bss_load_elem = NULL; const struct ieee80211_bss_load_elem *bss_load; guard(rcu)(); - if (ieee80211_vif_link_active(vif, link_conf->link_id)) - ies = rcu_dereference(link_conf->bss->beacon_ies); - else - ies = rcu_dereference(link_conf->bss->ies); - + ies = rcu_dereference(link_conf->bss->beacon_ies); if (ies) bss_load_elem = cfg80211_find_elem(WLAN_EID_QBSS_LOAD, ies->data, ies->len); From a3f8f8662771285511ae26c4c8d3ba1cd22159b9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 5 Nov 2025 14:39:45 +0100 Subject: [PATCH 1000/1075] power: always freeze efivarfs The efivarfs filesystems must always be frozen and thawed to resync variable state. Make it so. Link: https://patch.msgid.link/20251105-vorbild-zutreffen-fe00d1dd98db@brauner Signed-off-by: Christian Brauner --- fs/efivarfs/super.c | 1 + fs/super.c | 13 ++++++++++--- include/linux/fs.h | 3 ++- kernel/power/hibernate.c | 9 +++------ kernel/power/suspend.c | 3 +-- 5 files changed, 17 insertions(+), 12 deletions(-) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 1f4d8ce56667..6de97565d5f7 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -533,6 +533,7 @@ static struct file_system_type efivarfs_type = { .init_fs_context = efivarfs_init_fs_context, .kill_sb = efivarfs_kill_sb, .parameters = efivarfs_parameters, + .fs_flags = FS_POWER_FREEZE, }; static __init int efivarfs_init(void) diff --git a/fs/super.c b/fs/super.c index 5bab94fb7e03..277b84e5c279 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1183,11 +1183,14 @@ static inline bool get_active_super(struct super_block *sb) static const char *filesystems_freeze_ptr = "filesystems_freeze"; -static void filesystems_freeze_callback(struct super_block *sb, void *unused) +static void filesystems_freeze_callback(struct super_block *sb, void *freeze_all_ptr) { if (!sb->s_op->freeze_fs && !sb->s_op->freeze_super) return; + if (freeze_all_ptr && !(sb->s_type->fs_flags & FS_POWER_FREEZE)) + return; + if (!get_active_super(sb)) return; @@ -1201,9 +1204,13 @@ static void filesystems_freeze_callback(struct super_block *sb, void *unused) deactivate_super(sb); } -void filesystems_freeze(void) +void filesystems_freeze(bool freeze_all) { - __iterate_supers(filesystems_freeze_callback, NULL, + void *freeze_all_ptr = NULL; + + if (freeze_all) + freeze_all_ptr = &freeze_all; + __iterate_supers(filesystems_freeze_callback, freeze_all_ptr, SUPER_ITER_UNLOCKED | SUPER_ITER_REVERSE); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 3ea98c6cce81..249a1da8440e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2689,6 +2689,7 @@ struct file_system_type { #define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */ #define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_LBS 128 /* FS supports LBS */ +#define FS_POWER_FREEZE 256 /* Always freeze on suspend/hibernate */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; @@ -3606,7 +3607,7 @@ extern void drop_super_exclusive(struct super_block *sb); extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); -void filesystems_freeze(void); +void filesystems_freeze(bool freeze_all); void filesystems_thaw(void); extern int dcache_dir_open(struct inode *, struct file *); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 14e85ff23551..1f250ce036a0 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -825,8 +825,7 @@ int hibernate(void) goto Restore; ksys_sync_helper(); - if (filesystem_freeze_enabled) - filesystems_freeze(); + filesystems_freeze(filesystem_freeze_enabled); error = freeze_processes(); if (error) @@ -932,8 +931,7 @@ int hibernate_quiet_exec(int (*func)(void *data), void *data) if (error) goto restore; - if (filesystem_freeze_enabled) - filesystems_freeze(); + filesystems_freeze(filesystem_freeze_enabled); error = freeze_processes(); if (error) @@ -1083,8 +1081,7 @@ static int software_resume(void) if (error) goto Restore; - if (filesystem_freeze_enabled) - filesystems_freeze(); + filesystems_freeze(filesystem_freeze_enabled); pm_pr_dbg("Preparing processes for hibernation restore.\n"); error = freeze_processes(); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4bb4686c1c08..c933a63a9718 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -375,8 +375,7 @@ static int suspend_prepare(suspend_state_t state) if (error) goto Restore; - if (filesystem_freeze_enabled) - filesystems_freeze(); + filesystems_freeze(filesystem_freeze_enabled); trace_suspend_resume(TPS("freeze_processes"), 0, true); error = suspend_freeze_processes(); trace_suspend_resume(TPS("freeze_processes"), 0, false); From f6fdd77b3e0d519a2535a1e923558cd07d9acda9 Mon Sep 17 00:00:00 2001 From: Baojun Xu Date: Wed, 12 Nov 2025 17:26:09 +0800 Subject: [PATCH 1001/1075] ALSA: hda/tas2781: Correct the wrong project ID The project hardware ID should be ALC287_FIXUP_TXNW2781_I2C, not ALC287_FIXUP_TAS2781_I2C for HP Lampass projects. Fixes: 7a39c723b747 ("ALSA: hda/tas2781: Add new quirk for HP new projects") Signed-off-by: Baojun Xu Link: https://patch.msgid.link/20251112092609.15865-1-baojun.xu@ti.com Signed-off-by: Takashi Iwai --- sound/hda/codecs/realtek/alc269.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/hda/codecs/realtek/alc269.c b/sound/hda/codecs/realtek/alc269.c index a9698bf26887..269b6c1e3b6d 100644 --- a/sound/hda/codecs/realtek/alc269.c +++ b/sound/hda/codecs/realtek/alc269.c @@ -6700,9 +6700,9 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ed8, "HP Merino16", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8ed9, "HP Merino14W", ALC245_FIXUP_TAS2781_SPI_2), SND_PCI_QUIRK(0x103c, 0x8eda, "HP Merino16W", ALC245_FIXUP_TAS2781_SPI_2), - SND_PCI_QUIRK(0x103c, 0x8f40, "HP Lampas14", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x103c, 0x8f41, "HP Lampas16", ALC287_FIXUP_TAS2781_I2C), - SND_PCI_QUIRK(0x103c, 0x8f42, "HP LampasW14", ALC287_FIXUP_TAS2781_I2C), + SND_PCI_QUIRK(0x103c, 0x8f40, "HP Lampas14", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x103c, 0x8f41, "HP Lampas16", ALC287_FIXUP_TXNW2781_I2C), + SND_PCI_QUIRK(0x103c, 0x8f42, "HP LampasW14", ALC287_FIXUP_TXNW2781_I2C), SND_PCI_QUIRK(0x1043, 0x1032, "ASUS VivoBook X513EA", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1034, "ASUS GU605C", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), From 78f0e33cd6c939a555aa80dbed2fec6b333a7660 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 11 Nov 2025 06:28:15 +0000 Subject: [PATCH 1002/1075] fs/namespace: correctly handle errors returned by grab_requested_mnt_ns grab_requested_mnt_ns was changed to return error codes on failure, but its callers were not updated to check for error pointers, still checking only for a NULL return value. This commit updates the callers to use IS_ERR() or IS_ERR_OR_NULL() and PTR_ERR() to correctly check for and propagate errors. This also makes sure that the logic actually works and mount namespace file descriptors can be used to refere to mounts. Christian Brauner says: Rework the patch to be more ergonomic and in line with our overall error handling patterns. Fixes: 7b9d14af8777 ("fs: allow mount namespace fd") Cc: Christian Brauner Signed-off-by: Andrei Vagin Link: https://patch.msgid.link/20251111062815.2546189-1-avagin@google.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/namespace.c | 32 ++++++++++++++++---------------- include/uapi/linux/mount.h | 2 +- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index cc6e00e72437..2bad25709b2c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -141,7 +141,8 @@ static void mnt_ns_release(struct mnt_namespace *ns) kfree(ns); } } -DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T)) +DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, + if (!IS_ERR(_T)) mnt_ns_release(_T)) static void mnt_ns_release_rcu(struct rcu_head *rcu) { @@ -5726,7 +5727,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req, ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize); if (ret) return ret; - if (kreq->spare != 0) + if (kreq->mnt_ns_fd != 0 && kreq->mnt_ns_id) return -EINVAL; /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */ if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET) @@ -5743,16 +5744,12 @@ static struct mnt_namespace *grab_requested_mnt_ns(const struct mnt_id_req *kreq { struct mnt_namespace *mnt_ns; - if (kreq->mnt_ns_id && kreq->spare) - return ERR_PTR(-EINVAL); - - if (kreq->mnt_ns_id) - return lookup_mnt_ns(kreq->mnt_ns_id); - - if (kreq->spare) { + if (kreq->mnt_ns_id) { + mnt_ns = lookup_mnt_ns(kreq->mnt_ns_id); + } else if (kreq->mnt_ns_fd) { struct ns_common *ns; - CLASS(fd, f)(kreq->spare); + CLASS(fd, f)(kreq->mnt_ns_fd); if (fd_empty(f)) return ERR_PTR(-EBADF); @@ -5767,6 +5764,8 @@ static struct mnt_namespace *grab_requested_mnt_ns(const struct mnt_id_req *kreq } else { mnt_ns = current->nsproxy->mnt_ns; } + if (!mnt_ns) + return ERR_PTR(-ENOENT); refcount_inc(&mnt_ns->passive); return mnt_ns; @@ -5791,8 +5790,8 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req, return ret; ns = grab_requested_mnt_ns(&kreq); - if (!ns) - return -ENOENT; + if (IS_ERR(ns)) + return PTR_ERR(ns); if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) && !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN)) @@ -5902,8 +5901,8 @@ static void __free_klistmount_free(const struct klistmount *kls) static inline int prepare_klistmount(struct klistmount *kls, struct mnt_id_req *kreq, size_t nr_mnt_ids) { - u64 last_mnt_id = kreq->param; + struct mnt_namespace *ns; /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */ if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET) @@ -5917,9 +5916,10 @@ static inline int prepare_klistmount(struct klistmount *kls, struct mnt_id_req * if (!kls->kmnt_ids) return -ENOMEM; - kls->ns = grab_requested_mnt_ns(kreq); - if (!kls->ns) - return -ENOENT; + ns = grab_requested_mnt_ns(kreq); + if (IS_ERR(ns)) + return PTR_ERR(ns); + kls->ns = ns; kls->mnt_parent_id = kreq->mnt_id; return 0; diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index 7fa67c2031a5..5d3f8c9e3a62 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -197,7 +197,7 @@ struct statmount { */ struct mnt_id_req { __u32 size; - __u32 spare; + __u32 mnt_ns_fd; __u64 mnt_id; __u64 param; __u64 mnt_ns_id; From 3cd1548a278c7d6a9bdef1f1866e7cf66bfd3518 Mon Sep 17 00:00:00 2001 From: Mike Yuan Date: Sat, 8 Nov 2025 19:09:47 +0000 Subject: [PATCH 1003/1075] shmem: fix tmpfs reconfiguration (remount) when noswap is set In systemd we're trying to switch the internal credentials setup logic to new mount API [1], and I noticed fsconfig(FSCONFIG_CMD_RECONFIGURE) consistently fails on tmpfs with noswap option. This can be trivially reproduced with the following: ``` int fs_fd = fsopen("tmpfs", 0); fsconfig(fs_fd, FSCONFIG_SET_FLAG, "noswap", NULL, 0); fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); fsmount(fs_fd, 0, 0); fsconfig(fs_fd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0); <------ EINVAL ``` After some digging the culprit is shmem_reconfigure() rejecting !(ctx->seen & SHMEM_SEEN_NOSWAP) && sbinfo->noswap, which is bogus as ctx->seen serves as a mask for whether certain options are touched at all. On top of that, noswap option doesn't use fsparam_flag_no, hence it's not really possible to "reenable" swap to begin with. Drop the check and redundant SHMEM_SEEN_NOSWAP flag. [1] https://github.com/systemd/systemd/pull/39637 Fixes: 2c6efe9cf2d7 ("shmem: add support to ignore swap") Signed-off-by: Mike Yuan Link: https://patch.msgid.link/20251108190930.440685-1-me@yhndnzj.com Cc: Luis Chamberlain Cc: Christian Brauner Cc: Hugh Dickins Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner --- mm/shmem.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index b9081b817d28..1b976414d6fa 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -131,8 +131,7 @@ struct shmem_options { #define SHMEM_SEEN_INODES 2 #define SHMEM_SEEN_HUGE 4 #define SHMEM_SEEN_INUMS 8 -#define SHMEM_SEEN_NOSWAP 16 -#define SHMEM_SEEN_QUOTA 32 +#define SHMEM_SEEN_QUOTA 16 }; #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -4677,7 +4676,6 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) "Turning off swap in unprivileged tmpfs mounts unsupported"); } ctx->noswap = true; - ctx->seen |= SHMEM_SEEN_NOSWAP; break; case Opt_quota: if (fc->user_ns != &init_user_ns) @@ -4827,14 +4825,15 @@ static int shmem_reconfigure(struct fs_context *fc) err = "Current inum too high to switch to 32-bit inums"; goto out; } - if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) { + + /* + * "noswap" doesn't use fsparam_flag_no, i.e. there's no "swap" + * counterpart for (re-)enabling swap. + */ + if (ctx->noswap && !sbinfo->noswap) { err = "Cannot disable swap on remount"; goto out; } - if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) { - err = "Cannot enable swap on remount if it was disabled on first mount"; - goto out; - } if (ctx->seen & SHMEM_SEEN_QUOTA && !sb_any_quota_loaded(fc->root->d_sb)) { From 12741624645e098b2234a5ae341045a97473caf1 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 5 Nov 2025 22:20:24 +0100 Subject: [PATCH 1004/1075] fs: add iput_not_last() Signed-off-by: Mateusz Guzik Link: https://patch.msgid.link/20251105212025.807549-1-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/inode.c | 12 ++++++++++++ include/linux/fs.h | 1 + 2 files changed, 13 insertions(+) diff --git a/fs/inode.c b/fs/inode.c index ec9339024ac3..cff1d3af0d57 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1967,6 +1967,18 @@ void iput(struct inode *inode) } EXPORT_SYMBOL(iput); +/** + * iput_not_last - put an inode assuming this is not the last reference + * @inode: inode to put + */ +void iput_not_last(struct inode *inode) +{ + VFS_BUG_ON_INODE(atomic_read(&inode->i_count) < 2, inode); + + WARN_ON(atomic_sub_return(1, &inode->i_count) == 0); +} +EXPORT_SYMBOL(iput_not_last); + #ifdef CONFIG_BLOCK /** * bmap - find a block number in a file diff --git a/include/linux/fs.h b/include/linux/fs.h index 249a1da8440e..dd3b57cfadee 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2824,6 +2824,7 @@ extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); +void iput_not_last(struct inode *); int inode_update_timestamps(struct inode *inode, int flags); int generic_update_time(struct inode *, int); From 56325e8c68c0724d626f665773a5005dcf44e329 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 5 Nov 2025 22:20:25 +0100 Subject: [PATCH 1005/1075] landlock: fix splats from iput() after it started calling might_sleep() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At this point it is guaranteed this is not the last reference. However, a recent addition of might_sleep() at top of iput() started generating false-positives as it was executing for all values. Remedy the problem by using the newly introduced iput_not_last(). Reported-by: syzbot+12479ae15958fc3f54ec@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/68d32659.a70a0220.4f78.0012.GAE@google.com/ Fixes: 2ef435a872ab ("fs: add might_sleep() annotation to iput() and more") Signed-off-by: Mateusz Guzik Link: https://patch.msgid.link/20251105212025.807549-2-mjguzik@gmail.com Reviewed-by: Mickaël Salaün Signed-off-by: Christian Brauner --- security/landlock/fs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 0bade2c5aa1d..d9c12b993fa7 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -1335,11 +1335,10 @@ static void hook_sb_delete(struct super_block *const sb) * At this point, we own the ihold() reference that was * originally set up by get_inode_object() and the * __iget() reference that we just set in this loop - * walk. Therefore the following call to iput() will - * not sleep nor drop the inode because there is now at - * least two references to it. + * walk. Therefore there are at least two references + * on the inode. */ - iput(inode); + iput_not_last(inode); } else { spin_unlock(&object->lock); rcu_read_unlock(); From 85592114ffda568b507bc2b04f5e9afbe7c13b62 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Wed, 12 Nov 2025 10:28:53 +0000 Subject: [PATCH 1006/1075] KVM: arm64: VHE: Compute fgt traps before activating them On VHE, the Fine Grain Traps registers are written to hardware in kvm_arch_vcpu_load()->..->__activate_traps_hfgxtr(), but the fgt array is computed later, in kvm_vcpu_load_fgt(). This can lead to zero being written to the FGT registers the first time a VCPU is loaded. Also, any changes to the fgt array will be visible only after the VCPU is scheduled out, and then back in, which is not the intended behaviour. Fix it by computing the fgt array just before the fgt traps are written to hardware. Fixes: fb10ddf35c1c ("KVM: arm64: Compute per-vCPU FGTs at vcpu_load()") Signed-off-by: Alexandru Elisei Reviewed-by: Oliver Upton Link: https://patch.msgid.link/20251112102853.47759-1-alexandru.elisei@arm.com Signed-off-by: Marc Zyngier --- arch/arm64/kvm/arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 870953b4a8a7..052bf0d4d0b0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -624,6 +624,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_timer_vcpu_load(vcpu); kvm_vgic_load(vcpu); kvm_vcpu_load_debug(vcpu); + kvm_vcpu_load_fgt(vcpu); if (has_vhe()) kvm_vcpu_load_vhe(vcpu); kvm_arch_vcpu_load_fp(vcpu); @@ -642,7 +643,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) vcpu->arch.hcr_el2 |= HCR_TWI; vcpu_set_pauth_traps(vcpu); - kvm_vcpu_load_fgt(vcpu); if (is_protected_kvm_enabled()) { kvm_call_hyp_nvhe(__pkvm_vcpu_load, From f2687d3cc9f905505d7b510c50970176115066a2 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 7 Nov 2025 14:41:41 +0200 Subject: [PATCH 1007/1075] drm/i915/dp_mst: Disable Panel Replay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Disable Panel Replay on MST links until it's properly implemented. For instance the required VSC SDP is not programmed on MST and FEC is not enabled if Panel Replay is enabled. Fixes: 3257e55d3ea7 ("drm/i915/panelreplay: enable/disable panel replay") Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/15174 Cc: Jouni Högander Cc: Animesh Manna Cc: stable@vger.kernel.org # v6.8+ Reviewed-by: Jouni Högander Signed-off-by: Imre Deak Link: https://patch.msgid.link/20251107124141.911895-1-imre.deak@intel.com (cherry picked from commit e109f644b871df8440c886a69cdce971ed533088) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_psr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index d5e0a1e66944..4619237f1346 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -585,6 +585,10 @@ static void _panel_replay_init_dpcd(struct intel_dp *intel_dp) struct intel_display *display = to_intel_display(intel_dp); int ret; + /* TODO: Enable Panel Replay on MST once it's properly implemented. */ + if (intel_dp->mst_detect == DRM_DP_MST) + return; + ret = drm_dp_dpcd_read_data(&intel_dp->aux, DP_PANEL_REPLAY_CAP_SUPPORT, &intel_dp->pr_dpcd, sizeof(intel_dp->pr_dpcd)); if (ret < 0) From ac1499fcd40fe06479e9b933347b837ccabc2a40 Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Tue, 11 Nov 2025 14:43:24 +0800 Subject: [PATCH 1008/1075] ipv4: route: Prevent rt_bind_exception() from rebinding stale fnhe The sit driver's packet transmission path calls: sit_tunnel_xmit() -> update_or_create_fnhe(), which lead to fnhe_remove_oldest() being called to delete entries exceeding FNHE_RECLAIM_DEPTH+random. The race window is between fnhe_remove_oldest() selecting fnheX for deletion and the subsequent kfree_rcu(). During this time, the concurrent path's __mkroute_output() -> find_exception() can fetch the soon-to-be-deleted fnheX, and rt_bind_exception() then binds it with a new dst using a dst_hold(). When the original fnheX is freed via RCU, the dst reference remains permanently leaked. CPU 0 CPU 1 __mkroute_output() find_exception() [fnheX] update_or_create_fnhe() fnhe_remove_oldest() [fnheX] rt_bind_exception() [bind dst] RCU callback [fnheX freed, dst leak] This issue manifests as a device reference count leak and a warning in dmesg when unregistering the net device: unregister_netdevice: waiting for sitX to become free. Usage count = N Ido Schimmel provided the simple test validation method [1]. The fix clears 'oldest->fnhe_daddr' before calling fnhe_flush_routes(). Since rt_bind_exception() checks this field, setting it to zero prevents the stale fnhe from being reused and bound to a new dst just before it is freed. [1] ip netns add ns1 ip -n ns1 link set dev lo up ip -n ns1 address add 192.0.2.1/32 dev lo ip -n ns1 link add name dummy1 up type dummy ip -n ns1 route add 192.0.2.2/32 dev dummy1 ip -n ns1 link add name gretap1 up arp off type gretap \ local 192.0.2.1 remote 192.0.2.2 ip -n ns1 route add 198.51.0.0/16 dev gretap1 taskset -c 0 ip netns exec ns1 mausezahn gretap1 \ -A 198.51.100.1 -B 198.51.0.0/16 -t udp -p 1000 -c 0 -q & taskset -c 2 ip netns exec ns1 mausezahn gretap1 \ -A 198.51.100.1 -B 198.51.0.0/16 -t udp -p 1000 -c 0 -q & sleep 10 ip netns pids ns1 | xargs kill ip netns del ns1 Cc: stable@vger.kernel.org Fixes: 67d6d681e15b ("ipv4: make exception cache less predictible") Signed-off-by: Chuang Wang Reviewed-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20251111064328.24440-1-nashuiliang@gmail.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6d27d3610c1c..b549d6a57307 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -607,6 +607,11 @@ static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) oldest_p = fnhe_p; } } + + /* Clear oldest->fnhe_daddr to prevent this fnhe from being + * rebound with new dsts in rt_bind_exception(). + */ + oldest->fnhe_daddr = 0; fnhe_flush_routes(oldest); *oldest_p = oldest->fnhe_next; kfree_rcu(oldest, rcu); From 7c44656ab3ea6f8429027ed14c23b314502e2541 Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 11 Nov 2025 10:48:24 -0800 Subject: [PATCH 1009/1075] vfio: selftests: add iova range query helpers VFIO selftests need to map IOVAs from legally accessible ranges, which could vary between hardware. Tests in vfio_dma_mapping_test.c are making excessively strong assumptions about which IOVAs can be mapped. Add vfio_iommu_iova_ranges(), which queries IOVA ranges from the IOMMUFD or VFIO container associated with the device. The queried ranges are normalized to IOMMUFD's iommu_iova_range representation so that handling of IOVA ranges up the stack can be implementation-agnostic. iommu_iova_range and vfio_iova_range are equivalent, so bias to using the new interface's struct. Query IOMMUFD's ranges with IOMMU_IOAS_IOVA_RANGES. Query VFIO container's ranges with VFIO_IOMMU_GET_INFO and VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE. The underlying vfio_iommu_type1_info buffer-related functionality has been kept generic so the same helpers can be used to query other capability chain information, if needed. Reviewed-by: David Matlack Tested-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-1-7960244642c5@fb.com Signed-off-by: Alex Williamson --- .../selftests/vfio/lib/include/vfio_util.h | 8 +- .../selftests/vfio/lib/vfio_pci_device.c | 172 ++++++++++++++++++ 2 files changed, 179 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h index 240409bf5f8a..ef8f06ef0c13 100644 --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h @@ -4,9 +4,12 @@ #include #include -#include + +#include +#include #include #include +#include #include "../../../kselftest.h" @@ -206,6 +209,9 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_ void vfio_pci_device_cleanup(struct vfio_pci_device *device); void vfio_pci_device_reset(struct vfio_pci_device *device); +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, + u32 *nranges); + int __vfio_pci_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region); int __vfio_pci_dma_unmap(struct vfio_pci_device *device, diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index a381fd253aa7..11749348f53f 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -29,6 +29,178 @@ VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \ } while (0) +static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz, + u32 *cap_offset) +{ + struct vfio_info_cap_header *hdr; + + if (!*cap_offset) + return NULL; + + VFIO_ASSERT_LT(*cap_offset, bufsz); + VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr)); + + hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset); + *cap_offset = hdr->next; + + return hdr; +} + +static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info, + u16 cap_id) +{ + struct vfio_info_cap_header *hdr; + u32 cap_offset = info->cap_offset; + u32 max_depth; + u32 depth = 0; + + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) + return NULL; + + if (cap_offset) + VFIO_ASSERT_GE(cap_offset, sizeof(*info)); + + max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr); + + while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) { + depth++; + VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n"); + + if (hdr->id == cap_id) + return hdr; + } + + return NULL; +} + +/* Return buffer including capability chain, if present. Free with free() */ +static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device) +{ + struct vfio_iommu_type1_info *info; + + info = malloc(sizeof(*info)); + VFIO_ASSERT_NOT_NULL(info); + + *info = (struct vfio_iommu_type1_info) { + .argsz = sizeof(*info), + }; + + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); + + info = realloc(info, info->argsz); + VFIO_ASSERT_NOT_NULL(info); + + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); + + return info; +} + +/* + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to + * report iommufd's iommu_iova_range. Free with free(). + */ +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct vfio_iommu_type1_info_cap_iova_range *cap_range; + struct vfio_iommu_type1_info *info; + struct vfio_info_cap_header *hdr; + struct iommu_iova_range *ranges = NULL; + + info = vfio_iommu_get_info(device); + hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); + VFIO_ASSERT_NOT_NULL(hdr); + + cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header); + VFIO_ASSERT_GT(cap_range->nr_iovas, 0); + + ranges = calloc(cap_range->nr_iovas, sizeof(*ranges)); + VFIO_ASSERT_NOT_NULL(ranges); + + for (u32 i = 0; i < cap_range->nr_iovas; i++) { + ranges[i] = (struct iommu_iova_range){ + .start = cap_range->iova_ranges[i].start, + .last = cap_range->iova_ranges[i].end, + }; + } + + *nranges = cap_range->nr_iovas; + + free(info); + return ranges; +} + +/* Return iova ranges of the device's IOAS. Free with free() */ +static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct iommu_iova_range *ranges; + int ret; + + struct iommu_ioas_iova_ranges query = { + .size = sizeof(query), + .ioas_id = device->ioas_id, + }; + + ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); + VFIO_ASSERT_EQ(ret, -1); + VFIO_ASSERT_EQ(errno, EMSGSIZE); + VFIO_ASSERT_GT(query.num_iovas, 0); + + ranges = calloc(query.num_iovas, sizeof(*ranges)); + VFIO_ASSERT_NOT_NULL(ranges); + + query.allowed_iovas = (uintptr_t)ranges; + + ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); + *nranges = query.num_iovas; + + return ranges; +} + +static int iova_range_comp(const void *a, const void *b) +{ + const struct iommu_iova_range *ra = a, *rb = b; + + if (ra->start < rb->start) + return -1; + + if (ra->start > rb->start) + return 1; + + return 0; +} + +/* Return sorted IOVA ranges of the device. Free with free(). */ +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct iommu_iova_range *ranges; + + if (device->iommufd) + ranges = iommufd_iova_ranges(device, nranges); + else + ranges = vfio_iommu_iova_ranges(device, nranges); + + if (!ranges) + return NULL; + + VFIO_ASSERT_GT(*nranges, 0); + + /* Sort and check that ranges are sane and non-overlapping */ + qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp); + VFIO_ASSERT_LT(ranges[0].start, ranges[0].last); + + for (u32 i = 1; i < *nranges; i++) { + VFIO_ASSERT_LT(ranges[i].start, ranges[i].last); + VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start); + } + + return ranges; +} + iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) { struct vfio_dma_region *region; From a77fa0b9222d2f23a764061a3be18e6bc738672e Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 11 Nov 2025 10:48:25 -0800 Subject: [PATCH 1010/1075] vfio: selftests: fix map limit tests to use last available iova Use the newly available vfio_pci_iova_ranges() to determine the last legal IOVA, and use this as the basis for vfio_dma_map_limit_test tests. Fixes: de8d1f2fd5a5 ("vfio: selftests: add end of address space DMA map/unmap tests") Reviewed-by: David Matlack Tested-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-2-7960244642c5@fb.com Signed-off-by: Alex Williamson --- .../selftests/vfio/vfio_dma_mapping_test.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c index 4f1ea79a200c..e1374aab96bd 100644 --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -219,7 +221,10 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(); FIXTURE_SETUP(vfio_dma_map_limit_test) { struct vfio_dma_region *region = &self->region; + struct iommu_iova_range *ranges; u64 region_size = getpagesize(); + iova_t last_iova; + u32 nranges; /* * Over-allocate mmap by double the size to provide enough backing vaddr @@ -232,8 +237,13 @@ FIXTURE_SETUP(vfio_dma_map_limit_test) MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); ASSERT_NE(region->vaddr, MAP_FAILED); - /* One page prior to the end of address space */ - region->iova = ~(iova_t)0 & ~(region_size - 1); + ranges = vfio_pci_iova_ranges(self->device, &nranges); + VFIO_ASSERT_NOT_NULL(ranges); + last_iova = ranges[nranges - 1].last; + free(ranges); + + /* One page prior to the last iova */ + region->iova = last_iova & ~(region_size - 1); region->size = region_size; } @@ -276,6 +286,7 @@ TEST_F(vfio_dma_map_limit_test, overflow) struct vfio_dma_region *region = &self->region; int rc; + region->iova = ~(iova_t)0 & ~(region->size - 1); region->size = self->mmap_size; rc = __vfio_pci_dma_map(self->device, region); From ce0e3c403e00e9e03e80aca6570bf936a44279e2 Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 11 Nov 2025 10:48:26 -0800 Subject: [PATCH 1011/1075] vfio: selftests: add iova allocator Add struct iova_allocator, which gives tests a convenient way to generate legally-accessible IOVAs to map. This allocator traverses the sorted available IOVA ranges linearly, requires power-of-two size allocations, and does not support freeing iova allocations. The assumption is that tests are not IOVA space-bounded, and will not need to recycle IOVAs. This is based on Alex Williamson's patch series for adding an IOVA allocator [1]. [1] https://lore.kernel.org/all/20251108212954.26477-1-alex@shazbot.org/ Reviewed-by: David Matlack Tested-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-3-7960244642c5@fb.com Signed-off-by: Alex Williamson --- .../selftests/vfio/lib/include/vfio_util.h | 11 +++ .../selftests/vfio/lib/vfio_pci_device.c | 74 ++++++++++++++++++- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h index ef8f06ef0c13..69ec0c856481 100644 --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h @@ -188,6 +188,13 @@ struct vfio_pci_device { struct vfio_pci_driver driver; }; +struct iova_allocator { + struct iommu_iova_range *ranges; + u32 nranges; + u32 range_idx; + u64 range_offset; +}; + /* * Return the BDF string of the device that the test should use. * @@ -212,6 +219,10 @@ void vfio_pci_device_reset(struct vfio_pci_device *device); struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, u32 *nranges); +struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device); +void iova_allocator_cleanup(struct iova_allocator *allocator); +iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size); + int __vfio_pci_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region); int __vfio_pci_dma_unmap(struct vfio_pci_device *device, diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index 11749348f53f..b479a359da12 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -12,11 +12,12 @@ #include #include +#include #include #include +#include #include #include -#include #include "../../../kselftest.h" #include @@ -201,6 +202,77 @@ struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, return ranges; } +struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device) +{ + struct iova_allocator *allocator; + struct iommu_iova_range *ranges; + u32 nranges; + + ranges = vfio_pci_iova_ranges(device, &nranges); + VFIO_ASSERT_NOT_NULL(ranges); + + allocator = malloc(sizeof(*allocator)); + VFIO_ASSERT_NOT_NULL(allocator); + + *allocator = (struct iova_allocator){ + .ranges = ranges, + .nranges = nranges, + .range_idx = 0, + .range_offset = 0, + }; + + return allocator; +} + +void iova_allocator_cleanup(struct iova_allocator *allocator) +{ + free(allocator->ranges); + free(allocator); +} + +iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size) +{ + VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n"); + VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n"); + + for (;;) { + struct iommu_iova_range *range; + iova_t iova, last; + + VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges, + "IOVA allocator out of space\n"); + + range = &allocator->ranges[allocator->range_idx]; + iova = range->start + allocator->range_offset; + + /* Check for sufficient space at the current offset */ + if (check_add_overflow(iova, size - 1, &last) || + last > range->last) + goto next_range; + + /* Align iova to size */ + iova = last & ~(size - 1); + + /* Check for sufficient space at the aligned iova */ + if (check_add_overflow(iova, size - 1, &last) || + last > range->last) + goto next_range; + + if (last == range->last) { + allocator->range_idx++; + allocator->range_offset = 0; + } else { + allocator->range_offset = last - range->start + 1; + } + + return iova; + +next_range: + allocator->range_idx++; + allocator->range_offset = 0; + } +} + iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) { struct vfio_dma_region *region; From d323ad739666761646048fca587734f4ae64f2c8 Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 11 Nov 2025 10:48:27 -0800 Subject: [PATCH 1012/1075] vfio: selftests: replace iova=vaddr with allocated iovas vfio_dma_mapping_test and vfio_pci_driver_test currently use iova=vaddr as part of DMA mapping operations. However, not all IOMMUs support the same virtual address width as the processor. For instance, older Intel consumer platforms only support 39-bits of IOMMU address space. On such platforms, using the virtual address as the IOVA fails. Make the tests more robust by using iova_allocator to vend IOVAs, which queries legally accessible IOVAs from the underlying IOMMUFD or VFIO container. Reviewed-by: David Matlack Tested-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-4-7960244642c5@fb.com Signed-off-by: Alex Williamson --- tools/testing/selftests/vfio/vfio_dma_mapping_test.c | 5 ++++- tools/testing/selftests/vfio/vfio_pci_driver_test.c | 12 ++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c index e1374aab96bd..102603d4407d 100644 --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -95,6 +95,7 @@ static int iommu_mapping_get(const char *bdf, u64 iova, FIXTURE(vfio_dma_mapping_test) { struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; }; FIXTURE_VARIANT(vfio_dma_mapping_test) { @@ -119,10 +120,12 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | FIXTURE_SETUP(vfio_dma_mapping_test) { self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + self->iova_allocator = iova_allocator_init(self->device); } FIXTURE_TEARDOWN(vfio_dma_mapping_test) { + iova_allocator_cleanup(self->iova_allocator); vfio_pci_device_cleanup(self->device); } @@ -144,7 +147,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) else ASSERT_NE(region.vaddr, MAP_FAILED); - region.iova = (u64)region.vaddr; + region.iova = iova_allocator_alloc(self->iova_allocator, size); region.size = size; vfio_pci_dma_map(self->device, ®ion); diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c index 2dbd70b7db62..f69eec8b928d 100644 --- a/tools/testing/selftests/vfio/vfio_pci_driver_test.c +++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c @@ -19,6 +19,7 @@ static const char *device_bdf; } while (0) static void region_setup(struct vfio_pci_device *device, + struct iova_allocator *iova_allocator, struct vfio_dma_region *region, u64 size) { const int flags = MAP_SHARED | MAP_ANONYMOUS; @@ -29,7 +30,7 @@ static void region_setup(struct vfio_pci_device *device, VFIO_ASSERT_NE(vaddr, MAP_FAILED); region->vaddr = vaddr; - region->iova = (u64)vaddr; + region->iova = iova_allocator_alloc(iova_allocator, size); region->size = size; vfio_pci_dma_map(device, region); @@ -44,6 +45,7 @@ static void region_teardown(struct vfio_pci_device *device, FIXTURE(vfio_pci_driver_test) { struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; struct vfio_dma_region memcpy_region; void *vaddr; int msi_fd; @@ -72,14 +74,15 @@ FIXTURE_SETUP(vfio_pci_driver_test) struct vfio_pci_driver *driver; self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + self->iova_allocator = iova_allocator_init(self->device); driver = &self->device->driver; - region_setup(self->device, &self->memcpy_region, SZ_1G); - region_setup(self->device, &driver->region, SZ_2M); + region_setup(self->device, self->iova_allocator, &self->memcpy_region, SZ_1G); + region_setup(self->device, self->iova_allocator, &driver->region, SZ_2M); /* Any IOVA that doesn't overlap memcpy_region and driver->region. */ - self->unmapped_iova = 8UL * SZ_1G; + self->unmapped_iova = iova_allocator_alloc(self->iova_allocator, SZ_1G); vfio_pci_driver_init(self->device); self->msi_fd = self->device->msi_eventfds[driver->msi]; @@ -108,6 +111,7 @@ FIXTURE_TEARDOWN(vfio_pci_driver_test) region_teardown(self->device, &self->memcpy_region); region_teardown(self->device, &driver->region); + iova_allocator_cleanup(self->iova_allocator); vfio_pci_device_cleanup(self->device); } From 2d0e88f3fd1dcb37072d499c36162baf5b009d41 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Tue, 11 Nov 2025 12:15:29 -0700 Subject: [PATCH 1013/1075] io_uring/rsrc: don't use blk_rq_nr_phys_segments() as number of bvecs io_buffer_register_bvec() currently uses blk_rq_nr_phys_segments() as the number of bvecs in the request. However, bvecs may be split into multiple segments depending on the queue limits. Thus, the number of segments may overestimate the number of bvecs. For ublk devices, the only current users of io_buffer_register_bvec(), virt_boundary_mask, seg_boundary_mask, max_segments, and max_segment_size can all be set arbitrarily by the ublk server process. Set imu->nr_bvecs based on the number of bvecs the rq_for_each_bvec() loop actually yields. However, continue using blk_rq_nr_phys_segments() as an upper bound on the number of bvecs when allocating imu to avoid needing to iterate the bvecs a second time. Link: https://lore.kernel.org/io-uring/20251111191530.1268875-1-csander@purestorage.com/ Signed-off-by: Caleb Sander Mateos Fixes: 27cb27b6d5ea ("io_uring: add support for kernel registered bvecs") Reviewed-by: Ming Lei Reviewed-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- io_uring/rsrc.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 2602d76d5ff0..0010c4992490 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -943,8 +943,8 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, struct req_iterator rq_iter; struct io_mapped_ubuf *imu; struct io_rsrc_node *node; - struct bio_vec bv, *bvec; - u16 nr_bvecs; + struct bio_vec bv; + unsigned int nr_bvecs = 0; int ret = 0; io_ring_submit_lock(ctx, issue_flags); @@ -965,8 +965,11 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, goto unlock; } - nr_bvecs = blk_rq_nr_phys_segments(rq); - imu = io_alloc_imu(ctx, nr_bvecs); + /* + * blk_rq_nr_phys_segments() may overestimate the number of bvecs + * but avoids needing to iterate over the bvecs + */ + imu = io_alloc_imu(ctx, blk_rq_nr_phys_segments(rq)); if (!imu) { kfree(node); ret = -ENOMEM; @@ -977,16 +980,15 @@ int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, imu->len = blk_rq_bytes(rq); imu->acct_pages = 0; imu->folio_shift = PAGE_SHIFT; - imu->nr_bvecs = nr_bvecs; refcount_set(&imu->refs, 1); imu->release = release; imu->priv = rq; imu->is_kbuf = true; imu->dir = 1 << rq_data_dir(rq); - bvec = imu->bvec; rq_for_each_bvec(bv, rq, rq_iter) - *bvec++ = bv; + imu->bvec[nr_bvecs++] = bv; + imu->nr_bvecs = nr_bvecs; node->buf = imu; data->nodes[index] = node; From 5f02151c411dda46efcc5dc57b0845efcdcfc26d Mon Sep 17 00:00:00 2001 From: Zqiang Date: Wed, 12 Nov 2025 15:33:28 +0800 Subject: [PATCH 1014/1075] sched_ext: Fix unsafe locking in the scx_dump_state() For built with CONFIG_PREEMPT_RT=y kernels, the dump_lock will be converted sleepable spinlock and not disable-irq, so the following scenarios occur: inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage. irq_work/0/27 [HC0[0]:SC0[0]:HE1:SE1] takes: (&rq->__lock){?...}-{2:2}, at: raw_spin_rq_lock_nested+0x2b/0x40 {IN-HARDIRQ-W} state was registered at: lock_acquire+0x1e1/0x510 _raw_spin_lock_nested+0x42/0x80 raw_spin_rq_lock_nested+0x2b/0x40 sched_tick+0xae/0x7b0 update_process_times+0x14c/0x1b0 tick_periodic+0x62/0x1f0 tick_handle_periodic+0x48/0xf0 timer_interrupt+0x55/0x80 __handle_irq_event_percpu+0x20a/0x5c0 handle_irq_event_percpu+0x18/0xc0 handle_irq_event+0xb5/0x150 handle_level_irq+0x220/0x460 __common_interrupt+0xa2/0x1e0 common_interrupt+0xb0/0xd0 asm_common_interrupt+0x2b/0x40 _raw_spin_unlock_irqrestore+0x45/0x80 __setup_irq+0xc34/0x1a30 request_threaded_irq+0x214/0x2f0 hpet_time_init+0x3e/0x60 x86_late_time_init+0x5b/0xb0 start_kernel+0x308/0x410 x86_64_start_reservations+0x1c/0x30 x86_64_start_kernel+0x96/0xa0 common_startup_64+0x13e/0x148 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&rq->__lock); lock(&rq->__lock); *** DEADLOCK *** stack backtrace: CPU: 0 UID: 0 PID: 27 Comm: irq_work/0 Call Trace: dump_stack_lvl+0x8c/0xd0 dump_stack+0x14/0x20 print_usage_bug+0x42e/0x690 mark_lock.part.44+0x867/0xa70 ? __pfx_mark_lock.part.44+0x10/0x10 ? string_nocheck+0x19c/0x310 ? number+0x739/0x9f0 ? __pfx_string_nocheck+0x10/0x10 ? __pfx_check_pointer+0x10/0x10 ? kvm_sched_clock_read+0x15/0x30 ? sched_clock_noinstr+0xd/0x20 ? local_clock_noinstr+0x1c/0xe0 __lock_acquire+0xc4b/0x62b0 ? __pfx_format_decode+0x10/0x10 ? __pfx_string+0x10/0x10 ? __pfx___lock_acquire+0x10/0x10 ? __pfx_vsnprintf+0x10/0x10 lock_acquire+0x1e1/0x510 ? raw_spin_rq_lock_nested+0x2b/0x40 ? __pfx_lock_acquire+0x10/0x10 ? dump_line+0x12e/0x270 ? raw_spin_rq_lock_nested+0x20/0x40 _raw_spin_lock_nested+0x42/0x80 ? raw_spin_rq_lock_nested+0x2b/0x40 raw_spin_rq_lock_nested+0x2b/0x40 scx_dump_state+0x3b3/0x1270 ? finish_task_switch+0x27e/0x840 scx_ops_error_irq_workfn+0x67/0x80 irq_work_single+0x113/0x260 irq_work_run_list.part.3+0x44/0x70 run_irq_workd+0x6b/0x90 ? __pfx_run_irq_workd+0x10/0x10 smpboot_thread_fn+0x529/0x870 ? __pfx_smpboot_thread_fn+0x10/0x10 kthread+0x305/0x3f0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x40/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 This commit therefore use rq_lock_irqsave/irqrestore() to replace rq_lock/unlock() in the scx_dump_state(). Fixes: 07814a9439a3 ("sched_ext: Print debug dump after an error exit") Signed-off-by: Zqiang Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 1a019a7728fb..184d562bda5e 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4276,7 +4276,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) size_t avail, used; bool idle; - rq_lock(rq, &rf); + rq_lock_irqsave(rq, &rf); idle = list_empty(&rq->scx.runnable_list) && rq->curr->sched_class == &idle_sched_class; @@ -4345,7 +4345,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len) list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) scx_dump_task(&s, &dctx, p, ' '); next: - rq_unlock(rq, &rf); + rq_unlock_irqrestore(rq, &rf); } dump_newline(&s); From 4b747cc628d8f500d56cf1338280eacc66362ff3 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 10 Nov 2025 17:08:40 -0800 Subject: [PATCH 1015/1075] cpufreq: intel_pstate: Check IDA only before MSR_IA32_PERF_CTL writes Commit ac4e04d9e378 ("cpufreq: intel_pstate: Unchecked MSR aceess in legacy mode") introduced a check for feature X86_FEATURE_IDA to verify turbo mode support. Although this is the correct way to check for turbo mode support, it causes issues on some platforms that disable turbo during OS boot, but enable it later [1]. Before adding this feature check, users were able to get turbo mode frequencies by writing 0 to /sys/devices/system/cpu/intel_pstate/no_turbo post-boot. To restore the old behavior on the affected systems while still addressing the unchecked MSR issue on some Skylake-X systems, check X86_FEATURE_IDA only immediately before updates of MSR_IA32_PERF_CTL that may involve setting the Turbo Engage Bit (bit 32). Fixes: ac4e04d9e378 ("cpufreq: intel_pstate: Unchecked MSR aceess in legacy mode") Reported-by: Aaron Rainbolt Closes: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2122531 [1] Tested-by: Aaron Rainbolt Signed-off-by: Srinivas Pandruvada [ rjw: Subject adjustment, changelog edits ] Link: https://patch.msgid.link/20251111010840.141490-1-srinivas.pandruvada@linux.intel.com Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 38897bb14a2c..492a10f1bdbf 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -603,9 +603,6 @@ static bool turbo_is_disabled(void) { u64 misc_en; - if (!cpu_feature_enabled(X86_FEATURE_IDA)) - return true; - rdmsrq(MSR_IA32_MISC_ENABLE, misc_en); return !!(misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); @@ -2106,7 +2103,8 @@ static u64 atom_get_val(struct cpudata *cpudata, int pstate) u32 vid; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled) && + cpu_feature_enabled(X86_FEATURE_IDA)) val |= (u64)1 << 32; vid_fp = cpudata->vid.min + mul_fp( @@ -2271,7 +2269,8 @@ static u64 core_get_val(struct cpudata *cpudata, int pstate) u64 val; val = (u64)pstate << 8; - if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled)) + if (READ_ONCE(global.no_turbo) && !READ_ONCE(global.turbo_disabled) && + cpu_feature_enabled(X86_FEATURE_IDA)) val |= (u64)1 << 32; return val; From c87488a12393a23f8a1b9850b989b386c58cac3f Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Wed, 12 Nov 2025 08:42:02 -1000 Subject: [PATCH 1016/1075] sched/ext: convert scx_tasks_lock to raw spinlock Update scx_task_locks so that it's safe to lock/unlock in a non-sleepable context in PREEMPT_RT kernels. scx_task_locks is (non-raw) spinlock used to protect the list of tasks under SCX. This list is updated during from finish_task_switch(), which cannot sleep. Regular spinlocks can be locked in such a context in non-RT kernels, but are sleepable under when CONFIG_PREEMPT_RT=y. Convert scx_task_locks into a raw spinlock, which is not sleepable even on RT kernels. Sample backtrace: dump_stack_lvl+0x83/0xa0 __might_resched+0x14a/0x200 rt_spin_lock+0x61/0x1c0 ? sched_ext_dead+0x2d/0xf0 ? lock_release+0xc6/0x280 sched_ext_dead+0x2d/0xf0 ? srso_alias_return_thunk+0x5/0xfbef5 finish_task_switch.isra.0+0x254/0x360 __schedule+0x584/0x11d0 ? srso_alias_return_thunk+0x5/0xfbef5 ? srso_alias_return_thunk+0x5/0xfbef5 ? tick_nohz_idle_exit+0x7e/0x120 schedule_idle+0x23/0x40 cpu_startup_entry+0x29/0x30 start_secondary+0xf8/0x100 common_startup_64+0x13e/0x148 Signed-off-by: Emil Tsalapatis Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 184d562bda5e..03d05ea20006 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -25,7 +25,7 @@ static struct scx_sched __rcu *scx_root; * guarantee system safety. Maintain a dedicated task list which contains every * task between its fork and eventual free. */ -static DEFINE_SPINLOCK(scx_tasks_lock); +static DEFINE_RAW_SPINLOCK(scx_tasks_lock); static LIST_HEAD(scx_tasks); /* ops enable/disable */ @@ -476,7 +476,7 @@ static void scx_task_iter_start(struct scx_task_iter *iter) BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS & ((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1)); - spin_lock_irq(&scx_tasks_lock); + raw_spin_lock_irq(&scx_tasks_lock); iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR }; list_add(&iter->cursor.tasks_node, &scx_tasks); @@ -507,14 +507,14 @@ static void scx_task_iter_unlock(struct scx_task_iter *iter) __scx_task_iter_rq_unlock(iter); if (iter->list_locked) { iter->list_locked = false; - spin_unlock_irq(&scx_tasks_lock); + raw_spin_unlock_irq(&scx_tasks_lock); } } static void __scx_task_iter_maybe_relock(struct scx_task_iter *iter) { if (!iter->list_locked) { - spin_lock_irq(&scx_tasks_lock); + raw_spin_lock_irq(&scx_tasks_lock); iter->list_locked = true; } } @@ -2940,9 +2940,9 @@ void scx_post_fork(struct task_struct *p) } } - spin_lock_irq(&scx_tasks_lock); + raw_spin_lock_irq(&scx_tasks_lock); list_add_tail(&p->scx.tasks_node, &scx_tasks); - spin_unlock_irq(&scx_tasks_lock); + raw_spin_unlock_irq(&scx_tasks_lock); percpu_up_read(&scx_fork_rwsem); } @@ -2966,9 +2966,9 @@ void sched_ext_free(struct task_struct *p) { unsigned long flags; - spin_lock_irqsave(&scx_tasks_lock, flags); + raw_spin_lock_irqsave(&scx_tasks_lock, flags); list_del_init(&p->scx.tasks_node); - spin_unlock_irqrestore(&scx_tasks_lock, flags); + raw_spin_unlock_irqrestore(&scx_tasks_lock, flags); /* * @p is off scx_tasks and wholly ours. scx_enable()'s READY -> ENABLED From 9efb297c520f392ab04bc45544a03770c98c3798 Mon Sep 17 00:00:00 2001 From: Gopi Krishna Menon Date: Sat, 25 Oct 2025 01:50:40 +0530 Subject: [PATCH 1017/1075] hwmon: (gpd-fan) Fix compilation error in non-ACPI builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building gpd-fan driver without CONFIG_ACPI results in the following build errors: drivers/hwmon/gpd-fan.c: In function ‘gpd_ecram_read’: drivers/hwmon/gpd-fan.c:228:9: error: implicit declaration of function ‘outb’ [-Werror=implicit-function-declaration] 228 | outb(0x2E, addr_port); | ^~~~ drivers/hwmon/gpd-fan.c:241:16: error: implicit declaration of function ‘inb’ [-Werror=implicit-function-declaration] 241 | *val = inb(data_port); The definitions for inb() and outb() come from (specifically through ), which is implicitly included via . When CONFIG_ACPI is not set, is not included resulting in to be omitted as well. Since the driver does not depend on ACPI, remove and add directly to fix the compilation errors. Signed-off-by: Gopi Krishna Menon Link: https://lore.kernel.org/r/20251024202042.752160-1-krishnagopi487@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/gpd-fan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/gpd-fan.c b/drivers/hwmon/gpd-fan.c index 321794807e8d..48c84e3e9939 100644 --- a/drivers/hwmon/gpd-fan.c +++ b/drivers/hwmon/gpd-fan.c @@ -12,9 +12,9 @@ * Copyright (c) 2024 Cryolitia PukNgae */ -#include #include #include +#include #include #include #include From c55a8e24cd129b6d8fed20e3d63c10c2263e2fc8 Mon Sep 17 00:00:00 2001 From: Cryolitia PukNgae Date: Thu, 30 Oct 2025 22:30:06 +0800 Subject: [PATCH 1018/1075] hwmon: (gpd-fan) initialize EC on driver load for Win 4 The original implement will re-init the EC when it reports a zero value, and it's a workaround for the black box buggy firmware. Now a contributer test and report that, the bug is that, the firmware won't initialize the EC on boot, so the EC ramains in unusable status. And it won't need to re-init it during runtime. The original implement is not perfect, any write command will be ignored until we first read it. Just re-init it unconditionally when the driver load could work. Fixes: 0ab88e239439 ("hwmon: add GPD devices sensor driver") Co-developed-by: kylon <3252255+kylon@users.noreply.github.com> Signed-off-by: kylon <3252255+kylon@users.noreply.github.com> Link: https://github.com/Cryolitia/gpd-fan-driver/pull/20 Signed-off-by: Cryolitia PukNgae Link: https://lore.kernel.org/r/20251030-win4-v1-1-c374dcb86985@uniontech.com Signed-off-by: Guenter Roeck --- drivers/hwmon/gpd-fan.c | 52 ++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/drivers/hwmon/gpd-fan.c b/drivers/hwmon/gpd-fan.c index 48c84e3e9939..f81c3bc422f4 100644 --- a/drivers/hwmon/gpd-fan.c +++ b/drivers/hwmon/gpd-fan.c @@ -276,31 +276,6 @@ static int gpd_generic_read_rpm(void) return (u16)high << 8 | low; } -static void gpd_win4_init_ec(void) -{ - u8 chip_id, chip_ver; - - gpd_ecram_read(0x2000, &chip_id); - - if (chip_id == 0x55) { - gpd_ecram_read(0x1060, &chip_ver); - gpd_ecram_write(0x1060, chip_ver | 0x80); - } -} - -static int gpd_win4_read_rpm(void) -{ - int ret; - - ret = gpd_generic_read_rpm(); - - if (ret == 0) - // Re-init EC when speed is 0 - gpd_win4_init_ec(); - - return ret; -} - static int gpd_wm2_read_rpm(void) { for (u16 pwm_ctr_offset = GPD_PWM_CTR_OFFSET; @@ -320,11 +295,10 @@ static int gpd_wm2_read_rpm(void) static int gpd_read_rpm(void) { switch (gpd_driver_priv.drvdata->board) { + case win4_6800u: case win_mini: case duo: return gpd_generic_read_rpm(); - case win4_6800u: - return gpd_win4_read_rpm(); case win_max_2: return gpd_wm2_read_rpm(); } @@ -607,6 +581,28 @@ static struct hwmon_chip_info gpd_fan_chip_info = { .info = gpd_fan_hwmon_channel_info }; +static void gpd_win4_init_ec(void) +{ + u8 chip_id, chip_ver; + + gpd_ecram_read(0x2000, &chip_id); + + if (chip_id == 0x55) { + gpd_ecram_read(0x1060, &chip_ver); + gpd_ecram_write(0x1060, chip_ver | 0x80); + } +} + +static void gpd_init_ec(void) +{ + // The buggy firmware won't initialize EC properly on boot. + // Before its initialization, reading RPM will always return 0, + // and writing PWM will have no effect. + // Initialize it manually on driver load. + if (gpd_driver_priv.drvdata->board == win4_6800u) + gpd_win4_init_ec(); +} + static int gpd_fan_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -634,6 +630,8 @@ static int gpd_fan_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(hwdev), "Failed to register hwmon device\n"); + gpd_init_ec(); + return 0; } From 214291cbaaceeb28debd773336642b1fca393ae0 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 5 Nov 2025 16:51:15 -0700 Subject: [PATCH 1019/1075] acpi/hmat: Fix lockdep warning for hmem_register_resource() The following lockdep splat was observed while kernel auto-online a CXL memory region: ====================================================== WARNING: possible circular locking dependency detected 6.17.0djtest+ #53 Tainted: G W ------------------------------------------------------ systemd-udevd/3334 is trying to acquire lock: ffffffff90346188 (hmem_resource_lock){+.+.}-{4:4}, at: hmem_register_resource+0x31/0x50 but task is already holding lock: ffffffff90338890 ((node_chain).rwsem){++++}-{4:4}, at: blocking_notifier_call_chain+0x2e/0x70 which lock already depends on the new lock. [..] Chain exists of: hmem_resource_lock --> mem_hotplug_lock --> (node_chain).rwsem Possible unsafe locking scenario: CPU0 CPU1 ---- ---- rlock((node_chain).rwsem); lock(mem_hotplug_lock); lock((node_chain).rwsem); lock(hmem_resource_lock); The lock ordering can cause potential deadlock. There are instances where hmem_resource_lock is taken after (node_chain).rwsem, and vice versa. Split out the target update section of hmat_register_target() so that hmat_callback() only envokes that section instead of attempt to register hmem devices that it does not need to. [ dj: Fix up comment to be closer to 80cols. (Jonathan) ] Fixes: cf8741ac57ed ("ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device") Reviewed-by: Jonathan Cameron Tested-by: Smita Koralahalli Reviewed-by: Smita Koralahalli Reviewed-by: Dan Williams Link: https://patch.msgid.link/20251105235115.85062-3-dave.jiang@intel.com Signed-off-by: Dave Jiang --- drivers/acpi/numa/hmat.c | 46 ++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 5a36d57289b4..11e4483685c9 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -874,10 +874,32 @@ static void hmat_register_target_devices(struct memory_target *target) } } -static void hmat_register_target(struct memory_target *target) +static void hmat_hotplug_target(struct memory_target *target) { int nid = pxm_to_node(target->memory_pxm); + /* + * Skip offline nodes. This can happen when memory marked EFI_MEMORY_SP, + * "specific purpose", is applied to all the memory in a proximity + * domain leading to * the node being marked offline / unplugged, or if + * memory-only "hotplug" node is offline. + */ + if (nid == NUMA_NO_NODE || !node_online(nid)) + return; + + guard(mutex)(&target_lock); + if (target->registered) + return; + + hmat_register_target_initiators(target); + hmat_register_target_cache(target); + hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL); + hmat_register_target_perf(target, ACCESS_COORDINATE_CPU); + target->registered = true; +} + +static void hmat_register_target(struct memory_target *target) +{ /* * Devices may belong to either an offline or online * node, so unconditionally add them. @@ -895,25 +917,7 @@ static void hmat_register_target(struct memory_target *target) } mutex_unlock(&target_lock); - /* - * Skip offline nodes. This can happen when memory - * marked EFI_MEMORY_SP, "specific purpose", is applied - * to all the memory in a proximity domain leading to - * the node being marked offline / unplugged, or if - * memory-only "hotplug" node is offline. - */ - if (nid == NUMA_NO_NODE || !node_online(nid)) - return; - - mutex_lock(&target_lock); - if (!target->registered) { - hmat_register_target_initiators(target); - hmat_register_target_cache(target); - hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL); - hmat_register_target_perf(target, ACCESS_COORDINATE_CPU); - target->registered = true; - } - mutex_unlock(&target_lock); + hmat_hotplug_target(target); } static void hmat_register_targets(void) @@ -939,7 +943,7 @@ static int hmat_callback(struct notifier_block *self, if (!target) return NOTIFY_OK; - hmat_register_target(target); + hmat_hotplug_target(target); return NOTIFY_OK; } From 360b3730f8eab6c4467c6cca4cb0e30902174a63 Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Wed, 12 Nov 2025 14:57:09 +0800 Subject: [PATCH 1020/1075] ASoC: rsnd: fix OF node reference leak in rsnd_ssiu_probe() rsnd_ssiu_probe() leaks an OF node reference obtained by rsnd_ssiu_of_node(). The node reference is acquired but never released across all return paths. Fix it by declaring the device node with the __free(device_node) cleanup construct to ensure automatic release when the variable goes out of scope. Fixes: 4e7788fb8018 ("ASoC: rsnd: add SSIU BUSIF support") Signed-off-by: Haotian Zhang Acked-by: Kuninori Morimoto Link: https://patch.msgid.link/20251112065709.1522-1-vulab@iscas.ac.cn Signed-off-by: Mark Brown --- sound/soc/renesas/rcar/ssiu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/renesas/rcar/ssiu.c b/sound/soc/renesas/rcar/ssiu.c index faf351126d57..244fb833292a 100644 --- a/sound/soc/renesas/rcar/ssiu.c +++ b/sound/soc/renesas/rcar/ssiu.c @@ -509,7 +509,7 @@ void rsnd_parse_connect_ssiu(struct rsnd_dai *rdai, int rsnd_ssiu_probe(struct rsnd_priv *priv) { struct device *dev = rsnd_priv_to_dev(priv); - struct device_node *node; + struct device_node *node __free(device_node) = rsnd_ssiu_of_node(priv); struct rsnd_ssiu *ssiu; struct rsnd_mod_ops *ops; const int *list = NULL; @@ -522,7 +522,6 @@ int rsnd_ssiu_probe(struct rsnd_priv *priv) * see * rsnd_ssiu_bufsif_to_id() */ - node = rsnd_ssiu_of_node(priv); if (node) nr = rsnd_node_count(priv, node, SSIU_NAME); else From 4495bffd86ba0fdabfaef0c41d12f68ec2a1e05b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 10 Nov 2025 16:22:25 -0600 Subject: [PATCH 1021/1075] PCI/ASPM: Cache L0s/L1 Supported so advertised link states can be overridden Defective devices sometimes advertise support for ASPM L0s or L1 states even if they don't work correctly. Cache the L0s Supported and L1 Supported bits early in enumeration so HEADER quirks can override the ASPM states advertised in Link Capabilities before pcie_aspm_cap_init() enables ASPM. Signed-off-by: Bjorn Helgaas Tested-by: Shawn Lin Reviewed-by: Lukas Wunner Link: https://patch.msgid.link/20251110222929.2140564-2-helgaas@kernel.org --- drivers/pci/pcie/aspm.c | 12 ++++-------- drivers/pci/probe.c | 7 +++++++ include/linux/pci.h | 2 ++ 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 7cc8281e7011..15d50c089070 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -830,7 +830,6 @@ static void pcie_aspm_override_default_link_state(struct pcie_link_state *link) static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) { struct pci_dev *child = link->downstream, *parent = link->pdev; - u32 parent_lnkcap, child_lnkcap; u16 parent_lnkctl, child_lnkctl; struct pci_bus *linkbus = parent->subordinate; @@ -845,9 +844,8 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) * If ASPM not supported, don't mess with the clocks and link, * bail out now. */ - pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap); - pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap); - if (!(parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPMS)) + if (!(parent->aspm_l0s_support && child->aspm_l0s_support) && + !(parent->aspm_l1_support && child->aspm_l1_support)) return; /* Configure common clock before checking latencies */ @@ -859,8 +857,6 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) * read-only Link Capabilities may change depending on common clock * configuration (PCIe r5.0, sec 7.5.3.6). */ - pcie_capability_read_dword(parent, PCI_EXP_LNKCAP, &parent_lnkcap); - pcie_capability_read_dword(child, PCI_EXP_LNKCAP, &child_lnkcap); pcie_capability_read_word(parent, PCI_EXP_LNKCTL, &parent_lnkctl); pcie_capability_read_word(child, PCI_EXP_LNKCTL, &child_lnkctl); @@ -880,7 +876,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) * given link unless components on both sides of the link each * support L0s. */ - if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L0S) + if (parent->aspm_l0s_support && child->aspm_l0s_support) link->aspm_support |= PCIE_LINK_STATE_L0S; if (child_lnkctl & PCI_EXP_LNKCTL_ASPM_L0S) @@ -889,7 +885,7 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) link->aspm_enabled |= PCIE_LINK_STATE_L0S_DW; /* Setup L1 state */ - if (parent_lnkcap & child_lnkcap & PCI_EXP_LNKCAP_ASPM_L1) + if (parent->aspm_l1_support && child->aspm_l1_support) link->aspm_support |= PCIE_LINK_STATE_L1; if (parent_lnkctl & child_lnkctl & PCI_EXP_LNKCTL_ASPM_L1) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index c83e75a0ec12..de72ceaea285 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1663,6 +1663,13 @@ void set_pcie_port_type(struct pci_dev *pdev) if (reg32 & PCI_EXP_LNKCAP_DLLLARC) pdev->link_active_reporting = 1; +#ifdef CONFIG_PCIEASPM + if (reg32 & PCI_EXP_LNKCAP_ASPM_L0S) + pdev->aspm_l0s_support = 1; + if (reg32 & PCI_EXP_LNKCAP_ASPM_L1) + pdev->aspm_l1_support = 1; +#endif + parent = pci_upstream_bridge(pdev); if (!parent) return; diff --git a/include/linux/pci.h b/include/linux/pci.h index d1fdf81fbe1e..bf97d49c23cf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -412,6 +412,8 @@ struct pci_dev { u16 l1ss; /* L1SS Capability pointer */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state */ + unsigned int aspm_l0s_support:1; /* ASPM L0s support */ + unsigned int aspm_l1_support:1; /* ASPM L1 support */ unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ #endif From 575b98e39d817537afdc6c39d35c1de484a64d42 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 10 Nov 2025 16:22:26 -0600 Subject: [PATCH 1022/1075] PCI/ASPM: Add pcie_aspm_remove_cap() to override advertised link states Add pcie_aspm_remove_cap(). A quirk can use this to prevent use of ASPM L0s or L1 link states, even if the device advertised support for them. Signed-off-by: Bjorn Helgaas Tested-by: Shawn Lin Reviewed-by: Lukas Wunner Link: https://patch.msgid.link/20251110222929.2140564-3-helgaas@kernel.org --- drivers/pci/pci.h | 2 ++ drivers/pci/pcie/aspm.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 4492b809094b..36f8c0985430 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -958,6 +958,7 @@ void pci_save_aspm_l1ss_state(struct pci_dev *dev); void pci_restore_aspm_l1ss_state(struct pci_dev *dev); #ifdef CONFIG_PCIEASPM +void pcie_aspm_remove_cap(struct pci_dev *pdev, u32 lnkcap); void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked); @@ -965,6 +966,7 @@ void pcie_aspm_powersave_config_link(struct pci_dev *pdev); void pci_configure_ltr(struct pci_dev *pdev); void pci_bridge_reconfigure_ltr(struct pci_dev *pdev); #else +static inline void pcie_aspm_remove_cap(struct pci_dev *pdev, u32 lnkcap) { } static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { } diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 15d50c089070..f61d98897503 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1542,6 +1542,19 @@ int pci_enable_link_state_locked(struct pci_dev *pdev, int state) } EXPORT_SYMBOL(pci_enable_link_state_locked); +void pcie_aspm_remove_cap(struct pci_dev *pdev, u32 lnkcap) +{ + if (lnkcap & PCI_EXP_LNKCAP_ASPM_L0S) + pdev->aspm_l0s_support = 0; + if (lnkcap & PCI_EXP_LNKCAP_ASPM_L1) + pdev->aspm_l1_support = 0; + + pci_info(pdev, "ASPM: Link Capabilities%s%s treated as unsupported to avoid device defect\n", + lnkcap & PCI_EXP_LNKCAP_ASPM_L0S ? " L0s" : "", + lnkcap & PCI_EXP_LNKCAP_ASPM_L1 ? " L1" : ""); + +} + static int pcie_aspm_set_policy(const char *val, const struct kernel_param *kp) { From 30579eebba6ae52dc7441479aec9dd8d782256d3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 10 Nov 2025 16:22:27 -0600 Subject: [PATCH 1023/1075] PCI/ASPM: Convert quirks to override advertised link states Existing quirks to disable ASPM L0s and L1 use pci_disable_link_state(), which disables ASPM states and prevents their use in the future. But since they are FINAL quirks, they happen after ASPM has already been enabled. Here's a typical call path: pci_host_probe pci_scan_root_bus_bridge pci_scan_child_bus pci_scan_slot pci_scan_single_device pci_device_add pci_fixup_device(pci_fixup_header) # HEADER quirks pcie_aspm_init_link_state pcie_config_aspm_path pcie_config_aspm_link pcie_config_aspm_dev # ASPM may be enabled pci_bus_add_devices pci_bus_add_devices pci_fixup_device(pci_fixup_final) # FINAL quirks quirk_disable_aspm_l0s pci_disable_link_state(dev, PCIE_LINK_STATE_L0S) Sometimes enabling ASPM can make the link non-functional, so if we know ASPM is broken on a device, we shouldn't enable it at all, even temporarily. Convert the existing quirks to use pcie_aspm_remove_cap() instead, which overrides the ASPM Support advertised in PCIe Link Capabilities, and make them HEADER quirks so they run before pcie_aspm_init_link_state() has a chance to enable ASPM. Signed-off-by: Bjorn Helgaas Tested-by: Shawn Lin Reviewed-by: Lukas Wunner Link: https://patch.msgid.link/20251110222929.2140564-4-helgaas@kernel.org --- drivers/pci/quirks.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 214ed060ca1b..922c77c627a1 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2494,28 +2494,27 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, */ static void quirk_disable_aspm_l0s(struct pci_dev *dev) { - pci_info(dev, "Disabling L0s\n"); - pci_disable_link_state(dev, PCIE_LINK_STATE_L0S); + pcie_aspm_remove_cap(dev, PCI_EXP_LNKCAP_ASPM_L0S); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10a7, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10a9, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10b6, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c6, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c7, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10c8, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10d6, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10db, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10dd, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10e1, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10ec, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f1, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x10f4, quirk_disable_aspm_l0s); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1508, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10a7, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10a9, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10b6, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10c6, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10c7, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10c8, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10d6, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10db, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10dd, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10e1, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10ec, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10f1, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x10f4, quirk_disable_aspm_l0s); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1508, quirk_disable_aspm_l0s); static void quirk_disable_aspm_l0s_l1(struct pci_dev *dev) { - pci_info(dev, "Disabling ASPM L0s/L1\n"); - pci_disable_link_state(dev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); + pcie_aspm_remove_cap(dev, + PCI_EXP_LNKCAP_ASPM_L0S | PCI_EXP_LNKCAP_ASPM_L1); } /* @@ -2523,7 +2522,7 @@ static void quirk_disable_aspm_l0s_l1(struct pci_dev *dev) * upstream PCIe root port when ASPM is enabled. At least L0s mode is affected; * disable both L0s and L1 for now to be safe. */ -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASMEDIA, 0x1080, quirk_disable_aspm_l0s_l1); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ASMEDIA, 0x1080, quirk_disable_aspm_l0s_l1); /* * Some Pericom PCIe-to-PCI bridges in reverse mode need the PCIe Retrain From 5b40a5080c39933e7ce28bcb63cd4c1818d6c873 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 10 Nov 2025 16:22:28 -0600 Subject: [PATCH 1024/1075] PCI/ASPM: Avoid L0s and L1 on Freescale [1957:0451] Root Ports Christian reported that f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms") broke booting on the A-EON X5000. Override the L0s and L1 Support advertised in Link Capabilities by the X5000 Root Ports ([1957:0451]) so we don't try to enable those states. Fixes: f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms") Fixes: df5192d9bb0e ("PCI/ASPM: Enable only L0s and L1 for devicetree platforms") Reported-by: Christian Zigotzky Link: https://lore.kernel.org/r/db5c95a1-cf3e-46f9-8045-a1b04908051a@xenosoft.de Signed-off-by: Bjorn Helgaas Tested-by: Shawn Lin Reviewed-by: Lukas Wunner Link: https://patch.msgid.link/20251110222929.2140564-5-helgaas@kernel.org --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 922c77c627a1..b94264cd3833 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2523,6 +2523,7 @@ static void quirk_disable_aspm_l0s_l1(struct pci_dev *dev) * disable both L0s and L1 for now to be safe. */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ASMEDIA, 0x1080, quirk_disable_aspm_l0s_l1); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, 0x0451, quirk_disable_aspm_l0s_l1); /* * Some Pericom PCIe-to-PCI bridges in reverse mode need the PCIe Retrain From 823576c894d73255d35c0d0dabbb6ffecf1f2667 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 12 Nov 2025 18:36:24 -0600 Subject: [PATCH 1025/1075] PCI/ASPM: Avoid L0s and L1 on PA Semi [1959:a002] Root Ports Christian reported that f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms") broke booting on the A-EON AmigaOne X1000. Override the L0s and L1 Support advertised in Link Capabilities by the X1000 Root Ports ([1959:a002]) so we don't try to enable those states. Fixes: f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms") Fixes: df5192d9bb0e ("PCI/ASPM: Enable only L0s and L1 for devicetree platforms") Reported-by: Christian Zigotzky Link: https://lore.kernel.org/r/a41d2ca1-fcd9-c416-b111-a958e92e94bf@xenosoft.de Signed-off-by: Bjorn Helgaas --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b94264cd3833..90f6abdb77f4 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2524,6 +2524,7 @@ static void quirk_disable_aspm_l0s_l1(struct pci_dev *dev) */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ASMEDIA, 0x1080, quirk_disable_aspm_l0s_l1); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, 0x0451, quirk_disable_aspm_l0s_l1); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PASEMI, 0xa002, quirk_disable_aspm_l0s_l1); /* * Some Pericom PCIe-to-PCI bridges in reverse mode need the PCIe Retrain From 0eff2eaa5322b5b141ff5d5ded26fac4a52b5f7b Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Tue, 11 Nov 2025 17:08:28 +0800 Subject: [PATCH 1026/1075] virtio-net: fix incorrect flags recording in big mode The purpose of commit 703eec1b2422 ("virtio_net: fixing XDP for fully checksummed packets handling") is to record the flags in advance, as their value may be overwritten in the XDP case. However, the flags recorded under big mode are incorrect, because in big mode, the passed buf does not point to the rx buffer, but rather to the page of the submitted buffer. This commit fixes this issue. For the small mode, the commit c11a49d58ad2 ("virtio_net: Fix mismatched buf address when unmapping for small packets") fixed it. Tested-by: Alyssa Ross Fixes: 703eec1b2422 ("virtio_net: fixing XDP for fully checksummed packets handling") Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Acked-by: Michael S. Tsirkin Link: https://patch.msgid.link/20251111090828.23186-1-xuanzhuo@linux.alibaba.com Signed-off-by: Paolo Abeni --- drivers/net/virtio_net.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 8855a994e12b..0369dda5ed60 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2631,22 +2631,28 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, return; } - /* 1. Save the flags early, as the XDP program might overwrite them. + /* About the flags below: + * 1. Save the flags early, as the XDP program might overwrite them. * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID * stay valid after XDP processing. * 2. XDP doesn't work with partially checksummed packets (refer to * virtnet_xdp_set()), so packets marked as * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. */ - flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; - if (vi->mergeable_rx_bufs) + if (vi->mergeable_rx_bufs) { + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); - else if (vi->big_packets) + } else if (vi->big_packets) { + void *p = page_address((struct page *)buf); + + flags = ((struct virtio_net_common_hdr *)p)->hdr.flags; skb = receive_big(dev, vi, rq, buf, len, stats); - else + } else { + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); + } if (unlikely(!skb)) return; From 921b3f59b7b00cd7067ab775b0e0ca4eca436c2f Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 12 Nov 2025 18:53:18 -0600 Subject: [PATCH 1027/1075] PCI/ASPM: Avoid L0s and L1 on Hi1105 [19e5:1105] Wi-Fi This Wi-Fi advertises the L0s and L1 capabilities but actually it doesn't support them. This is confirmed by HiSilicon team in actual productization. Signed-off-by: Shawn Lin Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/1762916319-139532-1-git-send-email-shawn.lin@rock-chips.com --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 90f6abdb77f4..b9c252aa6fe0 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2525,6 +2525,7 @@ static void quirk_disable_aspm_l0s_l1(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ASMEDIA, 0x1080, quirk_disable_aspm_l0s_l1); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, 0x0451, quirk_disable_aspm_l0s_l1); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PASEMI, 0xa002, quirk_disable_aspm_l0s_l1); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_HUAWEI, 0x1105, quirk_disable_aspm_l0s_l1); /* * Some Pericom PCIe-to-PCI bridges in reverse mode need the PCIe Retrain From 0a4a18e888ae8c8004582f665c5792c84a681668 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 11 Nov 2025 17:09:20 -0800 Subject: [PATCH 1028/1075] drm/client: fix MODULE_PARM_DESC string for "active" The MODULE_PARM_DESC string for the "active" parameter is missing a space and has an extraneous trailing ']' character. Correct these. Before patch: $ modinfo -p ./drm_client_lib.ko active:Choose which drm client to start, default isfbdev] (string) After patch: $ modinfo -p ./drm_client_lib.ko active:Choose which drm client to start, default is fbdev (string) Fixes: f7b42442c4ac ("drm/log: Introduce a new boot logger to draw the kmsg on the screen") Signed-off-by: Randy Dunlap Reviewed-by: Thomas Zimmermann Reviewed-by: Jocelyn Falempe Signed-off-by: Thomas Zimmermann Link: https://patch.msgid.link/20251112010920.2355712-1-rdunlap@infradead.org --- drivers/gpu/drm/clients/drm_client_setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/clients/drm_client_setup.c b/drivers/gpu/drm/clients/drm_client_setup.c index 72480db1f00d..515aceac22b1 100644 --- a/drivers/gpu/drm/clients/drm_client_setup.c +++ b/drivers/gpu/drm/clients/drm_client_setup.c @@ -13,8 +13,8 @@ static char drm_client_default[16] = CONFIG_DRM_CLIENT_DEFAULT; module_param_string(active, drm_client_default, sizeof(drm_client_default), 0444); MODULE_PARM_DESC(active, - "Choose which drm client to start, default is" - CONFIG_DRM_CLIENT_DEFAULT "]"); + "Choose which drm client to start, default is " + CONFIG_DRM_CLIENT_DEFAULT); /** * drm_client_setup() - Setup in-kernel DRM clients From 96a3a03abf3d8cc38cd9cb0d280235fbcf7c3f7f Mon Sep 17 00:00:00 2001 From: Felix Maurer Date: Tue, 11 Nov 2025 17:29:32 +0100 Subject: [PATCH 1029/1075] hsr: Fix supervision frame sending on HSRv0 On HSRv0, no supervision frames were sent. The supervison frames were generated successfully, but failed the check for a sufficiently long mac header, i.e., at least sizeof(struct hsr_ethhdr), in hsr_fill_frame_info() because the mac header only contained the ethernet header. Fix this by including the HSR header in the mac header when generating HSR supervision frames. Note that the mac header now also includes the TLV fields. This matches how we set the headers on rx and also the size of struct hsrv0_ethhdr_sp. Reported-by: Hangbin Liu Closes: https://lore.kernel.org/netdev/aMONxDXkzBZZRfE5@fedora/ Fixes: 9cfb5e7f0ded ("net: hsr: fix hsr_init_sk() vs network/transport headers.") Signed-off-by: Felix Maurer Reviewed-by: Sebastian Andrzej Siewior Tested-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/4354114fea9a642fe71f49aeeb6c6159d1d61840.1762876095.git.fmaurer@redhat.com Tested-by: Hangbin Liu Signed-off-by: Paolo Abeni --- net/hsr/hsr_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index fbbc3ccf9df6..1235abb2d79f 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -320,6 +320,9 @@ static void send_hsr_supervision_frame(struct hsr_port *port, } hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); + skb_reset_mac_len(skb); + set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); From b2c26c82f7a94ec4da096f370e3612ee14424450 Mon Sep 17 00:00:00 2001 From: Felix Maurer Date: Tue, 11 Nov 2025 17:29:33 +0100 Subject: [PATCH 1030/1075] hsr: Follow standard for HSRv0 supervision frames For HSRv0, the path_id has the following meaning: - 0000: PRP supervision frame - 0001-1001: HSR ring identifier - 1010-1011: Frames from PRP network (A/B, with RedBoxes) - 1111: HSR supervision frame Follow the IEC 62439-3:2010 standard more closely by setting the right path_id for HSRv0 supervision frames (actually, it is correctly set when the frame is constructed, but hsr_set_path_id() overwrites it) and set a fixed HSR ring identifier of 1. The ring identifier seems to be generally unused and we ignore it anyways on reception, but some fixed identifier is definitely better than using one identifier in one direction and a wrong identifier in the other. This was also the behavior before commit f266a683a480 ("net/hsr: Better frame dispatch") which introduced the alternating path_id. This was later moved to hsr_set_path_id() in commit 451d8123f897 ("net: prp: add packet handling support"). The IEC 62439-3:2010 also contains 6 unused bytes after the MacAddressA in the HSRv0 supervision frames. Adjust a TODO comment accordingly. Fixes: f266a683a480 ("net/hsr: Better frame dispatch") Fixes: 451d8123f897 ("net: prp: add packet handling support") Signed-off-by: Felix Maurer Reviewed-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/ea0d5133cd593856b2fa673d6e2067bf1d4d1794.1762876095.git.fmaurer@redhat.com Tested-by: Hangbin Liu Signed-off-by: Paolo Abeni --- net/hsr/hsr_device.c | 2 +- net/hsr/hsr_forward.c | 22 +++++++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 1235abb2d79f..492cbc78ab75 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -337,7 +337,7 @@ static void send_hsr_supervision_frame(struct hsr_port *port, } hsr_stag->tlv.HSR_TLV_type = type; - /* TODO: Why 12 in HSRv0? */ + /* HSRv0 has 6 unused bytes after the MAC */ hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ? sizeof(struct hsr_sup_payload) : 12; diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index c67c0d35921d..339f0d220212 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -262,15 +262,23 @@ static struct sk_buff *prp_fill_rct(struct sk_buff *skb, return skb; } -static void hsr_set_path_id(struct hsr_ethhdr *hsr_ethhdr, +static void hsr_set_path_id(struct hsr_frame_info *frame, + struct hsr_ethhdr *hsr_ethhdr, struct hsr_port *port) { int path_id; - if (port->type == HSR_PT_SLAVE_A) - path_id = 0; - else - path_id = 1; + if (port->hsr->prot_version) { + if (port->type == HSR_PT_SLAVE_A) + path_id = 0; + else + path_id = 1; + } else { + if (frame->is_supervision) + path_id = 0xf; + else + path_id = 1; + } set_hsr_tag_path(&hsr_ethhdr->hsr_tag, path_id); } @@ -304,7 +312,7 @@ static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, else hsr_ethhdr = (struct hsr_ethhdr *)pc; - hsr_set_path_id(hsr_ethhdr, port); + hsr_set_path_id(frame, hsr_ethhdr, port); set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; @@ -330,7 +338,7 @@ struct sk_buff *hsr_create_tagged_frame(struct hsr_frame_info *frame, (struct hsr_ethhdr *)skb_mac_header(frame->skb_hsr); /* set the lane id properly */ - hsr_set_path_id(hsr_ethhdr, port); + hsr_set_path_id(frame, hsr_ethhdr, port); return skb_clone(frame->skb_hsr, GFP_ATOMIC); } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) { return skb_clone(frame->skb_std, GFP_ATOMIC); From ebd4469e7af61019daaf904fdcba07a9ecd18440 Mon Sep 17 00:00:00 2001 From: Andrew Donnellan Date: Wed, 5 Nov 2025 14:40:32 +1100 Subject: [PATCH 1031/1075] entry: Fix ifndef around arch_xfer_to_guest_mode_handle_work() stub The stub implementation of arch_xfer_to_guest_mode_handle_work() is guarded by an #ifndef that incorrectly checks for the name arch_xfer_to_guest_mode_work instead. It seems the function was renamed to add "_handle" as a late change to the original patch, and the #ifndef wasn't updated to go with it. Change the #ifndef to match the name of the function. No users right now, so no need to update any architecture code. Fixes: 935ace2fb5cc4 ("entry: Provide infrastructure for work before transitioning to guest mode") Signed-off-by: Andrew Donnellan Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251105-entry-fix-ifndef-v1-1-d8d28045b627@linux.ibm.com --- include/linux/entry-virt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/entry-virt.h b/include/linux/entry-virt.h index 42c89e3e5ca7..bfa767702d9a 100644 --- a/include/linux/entry-virt.h +++ b/include/linux/entry-virt.h @@ -32,7 +32,7 @@ */ static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work); -#ifndef arch_xfer_to_guest_mode_work +#ifndef arch_xfer_to_guest_mode_handle_work static inline int arch_xfer_to_guest_mode_handle_work(unsigned long ti_work) { return 0; From 0a8fb03fe7b0abab0ff16522e2625163183e7ae4 Mon Sep 17 00:00:00 2001 From: Kiryl Shutsemau Date: Thu, 13 Nov 2025 12:10:06 +0000 Subject: [PATCH 1032/1075] MAINTAINERS: Update name spelling Use transliteration from the Belarusian language instead of Russian. Signed-off-by: Kiryl Shutsemau Signed-off-by: Dave Hansen Link: https://patch.msgid.link/20251113121006.651992-1-kas%40kernel.org --- .mailmap | 2 +- MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 369cfe467932..0f74e16e239c 100644 --- a/.mailmap +++ b/.mailmap @@ -426,7 +426,7 @@ Kenneth W Chen Kenneth Westfield Kiran Gunda Kirill Tkhai -Kirill A. Shutemov +Kiryl Shutsemau Kishon Vijay Abraham I Konrad Dybcio Konrad Dybcio diff --git a/MAINTAINERS b/MAINTAINERS index 46bd8e033042..ffd964b0a9bc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -27845,7 +27845,7 @@ F: arch/x86/kernel/stacktrace.c F: arch/x86/kernel/unwind_*.c X86 TRUST DOMAIN EXTENSIONS (TDX) -M: Kirill A. Shutemov +M: Kiryl Shutsemau R: Dave Hansen R: Rick Edgecombe L: x86@kernel.org From fbade4bd08ba52cbc74a71c4e86e736f059f99f7 Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 11 Nov 2025 14:02:50 +0800 Subject: [PATCH 1033/1075] mptcp: Disallow MPTCP subflows from sockmap The sockmap feature allows bpf syscall from userspace, or based on bpf sockops, replacing the sk_prot of sockets during protocol stack processing with sockmap's custom read/write interfaces. ''' tcp_rcv_state_process() subflow_syn_recv_sock() tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) bpf_skops_established <== sockops bpf_sock_map_update(sk) <== call bpf helper tcp_bpf_update_proto() <== update sk_prot ''' Consider two scenarios: 1. When the server has MPTCP enabled and the client also requests MPTCP, the sk passed to the BPF program is a subflow sk. Since subflows only handle partial data, replacing their sk_prot is meaningless and will cause traffic disruption. 2. When the server has MPTCP enabled but the client sends a TCP SYN without MPTCP, subflow_syn_recv_sock() performs a fallback on the subflow, replacing the subflow sk's sk_prot with the native sk_prot. ''' subflow_ulp_fallback() subflow_drop_ctx() mptcp_subflow_ops_undo_override() ''' Subsequently, accept::mptcp_stream_accept::mptcp_fallback_tcp_ops() converts the subflow to plain TCP. For the first case, we should prevent it from being combined with sockmap by setting sk_prot->psock_update_sk_prot to NULL, which will be blocked by sockmap's own flow. For the second case, since subflow_syn_recv_sock() has already restored sk_prot to native tcp_prot/tcpv6_prot, no further action is needed. Fixes: cec37a6e41aa ("mptcp: Handle MP_CAPABLE options for outgoing connections") Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Reviewed-by: Matthieu Baerts (NGI0) Cc: Link: https://patch.msgid.link/20251111060307.194196-2-jiayuan.chen@linux.dev --- net/mptcp/subflow.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e8325890a322..af707ce0f624 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -2144,6 +2144,10 @@ void __init mptcp_subflow_init(void) tcp_prot_override = tcp_prot; tcp_prot_override.release_cb = tcp_release_cb_override; tcp_prot_override.diag_destroy = tcp_abort_override; +#ifdef CONFIG_BPF_SYSCALL + /* Disable sockmap processing for subflows */ + tcp_prot_override.psock_update_sk_prot = NULL; +#endif #if IS_ENABLED(CONFIG_MPTCP_IPV6) /* In struct mptcp_subflow_request_sock, we assume the TCP request sock @@ -2180,6 +2184,10 @@ void __init mptcp_subflow_init(void) tcpv6_prot_override = tcpv6_prot; tcpv6_prot_override.release_cb = tcp_release_cb_override; tcpv6_prot_override.diag_destroy = tcp_abort_override; +#ifdef CONFIG_BPF_SYSCALL + /* Disable sockmap processing for subflows */ + tcpv6_prot_override.psock_update_sk_prot = NULL; +#endif #endif mptcp_diag_subflow_init(&subflow_ulp_ops); From a257e974210320ede524f340ffe16bf4bf0dda1e Mon Sep 17 00:00:00 2001 From: Zqiang Date: Thu, 13 Nov 2025 19:43:55 +0800 Subject: [PATCH 1034/1075] sched_ext: Fix possible deadlock in the deferred_irq_workfn() For PREEMPT_RT=y kernels, the deferred_irq_workfn() is executed in the per-cpu irq_work/* task context and not disable-irq, if the rq returned by container_of() is current CPU's rq, the following scenarios may occur: lock(&rq->__lock); lock(&rq->__lock); This commit use IRQ_WORK_INIT_HARD() to replace init_irq_work() to initialize rq->scx.deferred_irq_work, make the deferred_irq_workfn() is always invoked in hard-irq context. Signed-off-by: Zqiang Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 03d05ea20006..07399210ac2d 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5321,7 +5321,7 @@ void __init init_sched_ext_class(void) BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n)); BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n)); BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n)); - init_irq_work(&rq->scx.deferred_irq_work, deferred_irq_workfn); + rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn); init_irq_work(&rq->scx.kick_cpus_irq_work, kick_cpus_irq_workfn); if (cpu_online(cpu)) From cbcff934fa7deb670d9545a3aad4d07e8f1e4f3c Mon Sep 17 00:00:00 2001 From: Harry Yoo Date: Tue, 11 Nov 2025 21:53:31 +0900 Subject: [PATCH 1035/1075] mm/slub: fix memory leak in free_to_pcs_bulk() The commit 989b09b73978 ("slab: skip percpu sheaves for remote object freeing") introduced the remote_objects array in free_to_pcs_bulk() to skip sheaves when objects from a remote node are freed. However, the array is flushed only when: 1) the array becomes full (++remote_nr >= PCS_BATCH_MAX), or 2) slab_free_hook() returns false and size becomes zero. When neither of the conditions is met, objects in the array are leaked. This resulted in a memory leak [1], where 82 GiB of memory was allocated for the maple_node cache. Flush the array after successfully freeing objects to sheaves in the do_free: path. In the meantime, move the snippet if (!size) goto flush_remote; outside the while loop for readability. Let's say all objects in the array are from a remote node: then we acquire s->cpu_sheaves->lock and try to free an object even when size is zero. This doesn't appear to be harmful, but isn't really readable. Reported-by: Tytus Rogalewski Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220765 [1] Closes: https://lore.kernel.org/linux-mm/20251107094809.12e9d705b7bf4815783eb184@linux-foundation.org Closes: https://lore.kernel.org/all/aRGDTwbt2EIz2CYn@hyeyoo Fixes: 989b09b73978 ("slab: skip percpu sheaves for remote object freeing") Signed-off-by: Harry Yoo Link: https://patch.msgid.link/20251111125331.12246-1-harry.yoo@oracle.com Acked-by: Liam R. Howlett Tested-by: Darrick J. Wong Tested-by: Tytus Rogalewski Signed-off-by: Vlastimil Babka --- mm/slub.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index f1a5373eee7b..a787687a0d59 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6332,8 +6332,6 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) if (unlikely(!slab_free_hook(s, p[i], init, false))) { p[i] = p[--size]; - if (!size) - goto flush_remote; continue; } @@ -6348,6 +6346,9 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) i++; } + if (!size) + goto flush_remote; + next_batch: if (!local_trylock(&s->cpu_sheaves->lock)) goto fallback; @@ -6402,6 +6403,9 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) goto next_batch; } + if (remote_nr) + goto flush_remote; + return; no_empty: From 85c894a80ac46aa177df04e0a33bcad409b7d64f Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Fri, 7 Nov 2025 11:31:50 -0600 Subject: [PATCH 1036/1075] perf header: Write bpf_prog (infos|btfs)_cnt to data file With commit f0d0f978f3f5830a ("perf header: Don't write empty BPF/BTF info"), the write_bpf_( prog_info() | btf() ) functions exit without writing anything if env->bpf_prog.(infos| btfs)_cnt is zero. process_bpf_( prog_info() | btf() ), however, still expect a "count" value to exist in the data file. If btf information is empty, for example, process_bpf_btf will read garbage or some other data as the number of btf nodes in the data file. As a result, the data file will not be processed correctly. Instead, write the count to the data file and exit if it is zero. Fixes: f0d0f978f3f5830a ("perf header: Don't write empty BPF/BTF info") Reviewed-by: Ian Rogers Signed-off-by: Thomas Falcon Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4f2a6e10ed5c..4e12be579140 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1022,12 +1022,9 @@ static int write_bpf_prog_info(struct feat_fd *ff, down_read(&env->bpf_progs.lock); - if (env->bpf_progs.infos_cnt == 0) - goto out; - ret = do_write(ff, &env->bpf_progs.infos_cnt, sizeof(env->bpf_progs.infos_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.infos_cnt == 0) goto out; root = &env->bpf_progs.infos; @@ -1067,13 +1064,10 @@ static int write_bpf_btf(struct feat_fd *ff, down_read(&env->bpf_progs.lock); - if (env->bpf_progs.btfs_cnt == 0) - goto out; - ret = do_write(ff, &env->bpf_progs.btfs_cnt, sizeof(env->bpf_progs.btfs_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.btfs_cnt == 0) goto out; root = &env->bpf_progs.btfs; From a09e5967ad6819379fd31894634d7aed29c18409 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 12 Nov 2025 21:57:08 -0300 Subject: [PATCH 1037/1075] perf build: Don't fail fast path feature detection when binutils-devel is not available This is one more remnant of the BUILD_NONDISTRO series to make building with binutils-devel opt-in due to license incompatibility. In this case just the references at link time were still in place, which make building the test-all.bin file fail, which wasn't detected before probably because the last test was done with binutils-devel available, doh. Now: $ rpm -q binutils-devel package binutils-devel is not installed $ file /tmp/build/perf-tools/feature/test-all.bin /tmp/build/perf-tools/feature/test-all.bin: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=4b5388a346b51f1b993f0b0dbd49f4570769b03c, for GNU/Linux 3.2.0, not stripped $ Fixes: 970ae86307718c34 ("perf build: The bfd features are opt-in, stop testing for them by default") Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: James Clark Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 4 ++-- tools/perf/Makefile.config | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 49b0add392b1..95646290cb89 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -107,7 +107,7 @@ all: $(FILES) __BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS) BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1 BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl - BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd + BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -ldl -lz -llzma -lzstd __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS) BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1 @@ -115,7 +115,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD_ALL) || $(BUILD_ALL) -lopcodes -liberty + $(BUILD_ALL) $(OUTPUT)test-hello.bin: $(BUILD) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 5700516aa84a..2dd5f5a60568 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -354,9 +354,6 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt -FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl -FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl - CORE_CFLAGS += -fno-omit-frame-pointer CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra @@ -930,6 +927,8 @@ ifdef BUILD_NONDISTRO ifeq ($(feature-libbfd), 1) EXTLIBS += -lbfd -lopcodes + FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl + FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl else # we are on a system that requires -liberty and (maybe) -lz # to link against -lbfd; test each case individually here From 84003ab3d0ca3717e4b36071c3c5f8b3c70e317c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 13 Nov 2025 15:03:07 -0300 Subject: [PATCH 1038/1075] tools headers UAPI: Sync KVM's vmx.h with the kernel to pick SEAMCALL exit reason To pick the changes in: 9d7dfb95da2cb5c1 ("KVM: VMX: Inject #UD if guest tries to execute SEAMCALL or TDCALL") The 'perf kvm-stat' tool uses the exit reasons that are included in the VMX_EXIT_REASONS define, this new SEAMCALL isn't included there (TDCALL is), so shouldn't be causing any change in behaviour, this patch ends up being just addressess the following perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Please see tools/include/uapi/README for further details. Cc: Sean Christopherson Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/vmx.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index 9792e329343e..1baa86dfe029 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -93,6 +93,7 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_SEAMCALL 76 #define EXIT_REASON_TDCALL 77 #define EXIT_REASON_MSR_READ_IMM 84 #define EXIT_REASON_MSR_WRITE_IMM 85 From d0206db94b36c998c11458cfdae2f45ba20bc4fb Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 13 Nov 2025 16:01:23 +0000 Subject: [PATCH 1039/1075] perf lock: Fix segfault due to missing kernel map Kernel maps are encoded in PERF_RECORD_MMAP2 samples but "perf lock report" and "perf lock contention" do not process MMAP2 samples. Because of that, machine->vmlinux_map stays NULL and any later access triggers a segmentation fault. Fix it by adding ->mmap2() callbacks. Fixes: 53b00ff358dc75b1 ("perf record: Make --buildid-mmap the default") Reported-by: Tycho Andersen (AMD) Reviewed-by: Ian Rogers Signed-off-by: Ravi Bangoria Tested-by: Tycho Andersen (AMD) Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-lock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 078634461df2..e8962c985d34 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1867,6 +1867,7 @@ static int __cmd_report(bool display_info) eops.sample = process_sample_event; eops.comm = perf_event__process_comm; eops.mmap = perf_event__process_mmap; + eops.mmap2 = perf_event__process_mmap2; eops.namespaces = perf_event__process_namespaces; eops.tracing_data = perf_event__process_tracing_data; session = perf_session__new(&data, &eops); @@ -2023,6 +2024,7 @@ static int __cmd_contention(int argc, const char **argv) eops.sample = process_sample_event; eops.comm = perf_event__process_comm; eops.mmap = perf_event__process_mmap; + eops.mmap2 = perf_event__process_mmap2; eops.tracing_data = perf_event__process_tracing_data; perf_env__init(&host_env); From 3c723f449723db2dc2b75b7efe03c2a76e4c09f0 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 13 Nov 2025 16:01:24 +0000 Subject: [PATCH 1040/1075] perf test: Fix lock contention test Couple of independent fixes: 1. Wire in SIGSEGV handler that terminates the test with a failure code. 2. Use "--lock-cgroup" instead of "-g"; "-g" was proposed but never merged. See commit 4d1792d0a2564caf ("perf lock contention: Add --lock-cgroup option") 3. Call cleanup() on every normal exit so trap_cleanup() doesn't mistake it for an unexpected signal and emit a false-negative "Unexpected signal in main" message. Before patch: # ./perf test -vv "lock contention" 85: kernel lock contention analysis test: --- start --- test child forked, pid 610711 Testing perf lock record and perf lock contention Testing perf lock contention --use-bpf Testing perf lock record and perf lock contention at the same time Testing perf lock contention --threads Testing perf lock contention --lock-addr Testing perf lock contention --lock-cgroup Unexpected signal in test_aggr_cgroup ---- end(0) ---- 85: kernel lock contention analysis test : Ok After patch: # ./perf test -vv "lock contention" 85: kernel lock contention analysis test: --- start --- test child forked, pid 602637 Testing perf lock record and perf lock contention Testing perf lock contention --use-bpf Testing perf lock record and perf lock contention at the same time Testing perf lock contention --threads Testing perf lock contention --lock-addr Testing perf lock contention --lock-cgroup Testing perf lock contention --type-filter (w/ spinlock) Testing perf lock contention --lock-filter (w/ tasklist_lock) Testing perf lock contention --callstack-filter (w/ unix_stream) [Skip] Could not find 'unix_stream' Testing perf lock contention --callstack-filter with task aggregation [Skip] Could not find 'unix_stream' Testing perf lock contention --cgroup-filter Testing perf lock contention CSV output ---- end(0) ---- 85: kernel lock contention analysis test : Ok Reviewed-by: Ian Rogers Signed-off-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ananth Narayan Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sandipan Das Cc: Santosh Shukla Cc: Tycho Andersen Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/lock_contention.sh | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index 7248a74ca2a3..6dd90519f45c 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -13,15 +13,18 @@ cleanup() { rm -f ${perfdata} rm -f ${result} rm -f ${errout} - trap - EXIT TERM INT + trap - EXIT TERM INT ERR } trap_cleanup() { + if (( $? == 139 )); then #SIGSEGV + err=1 + fi echo "Unexpected signal in ${FUNCNAME[1]}" cleanup exit ${err} } -trap trap_cleanup EXIT TERM INT +trap trap_cleanup EXIT TERM INT ERR check() { if [ "$(id -u)" != 0 ]; then @@ -145,7 +148,7 @@ test_aggr_cgroup() fi # the perf lock contention output goes to the stderr - perf lock con -a -b -g -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} + perf lock con -a -b --lock-cgroup -E 1 -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result count is not 1:" "$(cat "${result}" | wc -l)" err=1 @@ -271,7 +274,7 @@ test_cgroup_filter() return fi - perf lock con -a -b -g -E 1 -F wait_total -q -- perf bench sched messaging -p > /dev/null 2> ${result} + perf lock con -a -b --lock-cgroup -E 1 -F wait_total -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result should have a cgroup result:" "$(cat "${result}")" err=1 @@ -279,7 +282,7 @@ test_cgroup_filter() fi cgroup=$(cat "${result}" | awk '{ print $3 }') - perf lock con -a -b -g -E 1 -G "${cgroup}" -q -- perf bench sched messaging -p > /dev/null 2> ${result} + perf lock con -a -b --lock-cgroup -E 1 -G "${cgroup}" -q -- perf bench sched messaging -p > /dev/null 2> ${result} if [ "$(cat "${result}" | wc -l)" != "1" ]; then echo "[Fail] BPF result should have a result with cgroup filter:" "$(cat "${cgroup}")" err=1 @@ -338,4 +341,5 @@ test_aggr_task_stack_filter test_cgroup_filter test_csv_output +cleanup exit ${err} From b72b8132d8fd2d6bf5b420a03d4fc553980c3a92 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 11 Nov 2025 23:43:11 -0800 Subject: [PATCH 1041/1075] perf libbfd: Ensure libbfd is initialized prior to use Multiple threads may be creating and destroying BFD objects in situations like `perf top`. Without appropriate initialization crashes may occur during libbfd's cache management. BFD's locks require recursive mutexes, add support for these. Committer testing: This happens only when building with 'make BUILD_NONDISTRO=1' and having the binutils-devel package (or equivalent) installed, i.e. linking with binutils devel files, an opt-in perf build. Before: root@x1:~# perf top perf: Segmentation fault -------- backtrace -------- root@x1:~# After this patch it works as before. Closes: https://lore.kernel.org/lkml/aQt66zhfxSA80xwt@gentoo.org/ Fixes: 95931d9a594dd0b5 ("perf libbfd: Move libbfd functionality to its own file") Reported-by: Guilherme Amadio Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/libbfd.c | 38 ++++++++++++++++++++++++++++++++++++++ tools/perf/util/mutex.c | 14 ++++++++++---- tools/perf/util/mutex.h | 2 ++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/libbfd.c b/tools/perf/util/libbfd.c index 01147fbf73b3..6434c2dccd4a 100644 --- a/tools/perf/util/libbfd.c +++ b/tools/perf/util/libbfd.c @@ -38,6 +38,39 @@ struct a2l_data { asymbol **syms; }; +static bool perf_bfd_lock(void *bfd_mutex) +{ + mutex_lock(bfd_mutex); + return true; +} + +static bool perf_bfd_unlock(void *bfd_mutex) +{ + mutex_unlock(bfd_mutex); + return true; +} + +static void perf_bfd_init(void) +{ + static struct mutex bfd_mutex; + + mutex_init_recursive(&bfd_mutex); + + if (bfd_init() != BFD_INIT_MAGIC) { + pr_err("Error initializing libbfd\n"); + return; + } + if (!bfd_thread_init(perf_bfd_lock, perf_bfd_unlock, &bfd_mutex)) + pr_err("Error initializing libbfd threading\n"); +} + +static void ensure_bfd_init(void) +{ + static pthread_once_t bfd_init_once = PTHREAD_ONCE_INIT; + + pthread_once(&bfd_init_once, perf_bfd_init); +} + static int bfd_error(const char *string) { const char *errmsg; @@ -132,6 +165,7 @@ static struct a2l_data *addr2line_init(const char *path) bfd *abfd; struct a2l_data *a2l = NULL; + ensure_bfd_init(); abfd = bfd_openr(path, NULL); if (abfd == NULL) return NULL; @@ -288,6 +322,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) bfd *abfd; u64 start, len; + ensure_bfd_init(); abfd = bfd_openr(debugfile, NULL); if (!abfd) return -1; @@ -393,6 +428,7 @@ int libbfd__read_build_id(const char *filename, struct build_id *bid, bool block if (fd < 0) return -1; + ensure_bfd_init(); abfd = bfd_fdopenr(filename, /*target=*/NULL, fd); if (!abfd) return -1; @@ -421,6 +457,7 @@ int libbfd_filename__read_debuglink(const char *filename, char *debuglink, asection *section; bfd *abfd; + ensure_bfd_init(); abfd = bfd_openr(filename, NULL); if (!abfd) return -1; @@ -480,6 +517,7 @@ int symbol__disassemble_bpf_libbfd(struct symbol *sym __maybe_unused, memset(tpath, 0, sizeof(tpath)); perf_exe(tpath, sizeof(tpath)); + ensure_bfd_init(); bfdf = bfd_openr(tpath, NULL); if (bfdf == NULL) abort(); diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c index bca7f0717f35..7aa1f3f55a7d 100644 --- a/tools/perf/util/mutex.c +++ b/tools/perf/util/mutex.c @@ -17,7 +17,7 @@ static void check_err(const char *fn, int err) #define CHECK_ERR(err) check_err(__func__, err) -static void __mutex_init(struct mutex *mtx, bool pshared) +static void __mutex_init(struct mutex *mtx, bool pshared, bool recursive) { pthread_mutexattr_t attr; @@ -27,21 +27,27 @@ static void __mutex_init(struct mutex *mtx, bool pshared) /* In normal builds enable error checking, such as recursive usage. */ CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); #endif + if (recursive) + CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)); if (pshared) CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); - CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr)); CHECK_ERR(pthread_mutexattr_destroy(&attr)); } void mutex_init(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/false); + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/false); } void mutex_init_pshared(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/true); + __mutex_init(mtx, /*pshared=*/true, /*recursive=*/false); +} + +void mutex_init_recursive(struct mutex *mtx) +{ + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/true); } void mutex_destroy(struct mutex *mtx) diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h index 38458f00846f..70232d8d094f 100644 --- a/tools/perf/util/mutex.h +++ b/tools/perf/util/mutex.h @@ -104,6 +104,8 @@ void mutex_init(struct mutex *mtx); * process-private attribute. */ void mutex_init_pshared(struct mutex *mtx); +/* Initializes a mutex that may be recursively held on the same thread. */ +void mutex_init_recursive(struct mutex *mtx); void mutex_destroy(struct mutex *mtx); void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx); From c77b3b79a92e3345aa1ee296180d1af4e7031f8f Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 11 Nov 2025 14:02:51 +0800 Subject: [PATCH 1042/1075] mptcp: Fix proto fallback detection with BPF The sockmap feature allows bpf syscall from userspace, or based on bpf sockops, replacing the sk_prot of sockets during protocol stack processing with sockmap's custom read/write interfaces. ''' tcp_rcv_state_process() syn_recv_sock()/subflow_syn_recv_sock() tcp_init_transfer(BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) bpf_skops_established <== sockops bpf_sock_map_update(sk) <== call bpf helper tcp_bpf_update_proto() <== update sk_prot ''' When the server has MPTCP enabled but the client sends a TCP SYN without MPTCP, subflow_syn_recv_sock() performs a fallback on the subflow, replacing the subflow sk's sk_prot with the native sk_prot. ''' subflow_syn_recv_sock() subflow_ulp_fallback() subflow_drop_ctx() mptcp_subflow_ops_undo_override() ''' Then, this subflow can be normally used by sockmap, which replaces the native sk_prot with sockmap's custom sk_prot. The issue occurs when the user executes accept::mptcp_stream_accept::mptcp_fallback_tcp_ops(). Here, it uses sk->sk_prot to compare with the native sk_prot, but this is incorrect when sockmap is used, as we may incorrectly set sk->sk_socket->ops. This fix uses the more generic sk_family for the comparison instead. Additionally, this also prevents a WARNING from occurring: result from ./scripts/decode_stacktrace.sh: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 337 at net/mptcp/protocol.c:68 mptcp_stream_accept \ (net/mptcp/protocol.c:4005) Modules linked in: ... PKRU: 55555554 Call Trace: do_accept (net/socket.c:1989) __sys_accept4 (net/socket.c:2028 net/socket.c:2057) __x64_sys_accept (net/socket.c:2067) x64_sys_call (arch/x86/entry/syscall_64.c:41) do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) RIP: 0033:0x7f87ac92b83d ---[ end trace 0000000000000000 ]--- Fixes: 0b4f33def7bb ("mptcp: fix tcp fallback crash") Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Reviewed-by: Jakub Sitnicki Reviewed-by: Matthieu Baerts (NGI0) Cc: Link: https://patch.msgid.link/20251111060307.194196-3-jiayuan.chen@linux.dev --- net/mptcp/protocol.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2d6b8de35c44..90b4aeca2596 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -61,11 +61,13 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk) static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) { + unsigned short family = READ_ONCE(sk->sk_family); + #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (sk->sk_prot == &tcpv6_prot) + if (family == AF_INET6) return &inet6_stream_ops; #endif - WARN_ON_ONCE(sk->sk_prot != &tcp_prot); + WARN_ON_ONCE(family != AF_INET); return &inet_stream_ops; } From cb730e4ac1b4dca09d364fd83464ebd29547a4ef Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 11 Nov 2025 14:02:52 +0800 Subject: [PATCH 1043/1075] selftests/bpf: Add mptcp test with sockmap Add test cases to verify that when MPTCP falls back to plain TCP sockets, they can properly work with sockmap. Additionally, add test cases to ensure that sockmap correctly rejects MPTCP sockets as expected. Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Acked-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251111060307.194196-4-jiayuan.chen@linux.dev --- .../testing/selftests/bpf/prog_tests/mptcp.c | 140 ++++++++++++++++++ .../selftests/bpf/progs/mptcp_sockmap.c | 43 ++++++ 2 files changed, 183 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_sockmap.c diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index f8eb7f9d4fd2..8fade8bdc451 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -6,11 +6,13 @@ #include #include #include +#include #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" +#include "mptcp_sockmap.skel.h" #define NS_TEST "mptcp_ns" #define ADDR_1 "10.0.1.1" @@ -436,6 +438,142 @@ static void test_subflow(void) close(cgroup_fd); } +/* Test sockmap on MPTCP server handling non-mp-capable clients. */ +static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, client_fd1 = -1, client_fd2 = -1; + int server_fd1 = -1, server_fd2 = -1, sent, recvd; + char snd[9] = "123456789"; + char rcv[10]; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client without MPTCP enabled */ + client_fd1 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd1 = accept(listen_fd, NULL, 0); + skel->bss->sk_index = 1; + client_fd2 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd2 = accept(listen_fd, NULL, 0); + /* test normal redirect behavior: data sent by client_fd1 can be + * received by client_fd2 + */ + skel->bss->redirect_idx = 1; + sent = send(client_fd1, snd, sizeof(snd), 0); + if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)")) + goto end; + + /* try to recv more bytes to avoid truncation check */ + recvd = recv(client_fd2, rcv, sizeof(rcv), 0); + if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)")) + goto end; + +end: + if (client_fd1 >= 0) + close(client_fd1); + if (client_fd2 >= 0) + close(client_fd2); + if (server_fd1 >= 0) + close(server_fd1); + if (server_fd2 >= 0) + close(server_fd2); + close(listen_fd); +} + +/* Test sockmap rejection of MPTCP sockets - both server and client sides. */ +static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, server_fd = -1, client_fd1 = -1; + int err, zero = 0; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client with MPTCP enabled */ + client_fd1 = connect_to_fd(listen_fd, 0); + if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1")) + goto end; + + /* bpf_sock_map_update() called from sockops should reject MPTCP sk */ + if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject")) + goto end; + + server_fd = accept(listen_fd, NULL, 0); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &server_fd, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed")) + goto end; + + /* MPTCP client should also be disallowed */ + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &client_fd1, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed")) + goto end; +end: + if (client_fd1 >= 0) + close(client_fd1); + if (server_fd >= 0) + close(server_fd); + close(listen_fd); +} + +static void test_mptcp_sockmap(void) +{ + struct mptcp_sockmap *skel; + struct netns_obj *netns; + int cgroup_fd, err; + + cgroup_fd = test__join_cgroup("/mptcp_sockmap"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap")) + return; + + skel = mptcp_sockmap__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap")) + goto close_cgroup; + + skel->links.mptcp_sockmap_inject = + bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap")) + goto skel_destroy; + + err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect), + bpf_map__fd(skel->maps.sock_map), + BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap")) + goto skel_destroy; + + if (endpoint_init("subflow") < 0) + goto close_netns; + + test_sockmap_with_mptcp_fallback(skel); + test_sockmap_reject_mptcp(skel); + +close_netns: + netns_free(netns); +skel_destroy: + mptcp_sockmap__destroy(skel); +close_cgroup: + close(cgroup_fd); +} + void test_mptcp(void) { if (test__start_subtest("base")) @@ -444,4 +582,6 @@ void test_mptcp(void) test_mptcpify(); if (test__start_subtest("subflow")) test_subflow(); + if (test__start_subtest("sockmap")) + test_mptcp_sockmap(); } diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c new file mode 100644 index 000000000000..d4eef0cbadb9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +int sk_index; +int redirect_idx; +int trace_port; +int helper_ret; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 100); +} sock_map SEC(".maps"); + +SEC("sockops") +int mptcp_sockmap_inject(struct bpf_sock_ops *skops) +{ + struct bpf_sock *sk; + + /* only accept specified connection */ + if (skops->local_port != trace_port || + skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) + return 1; + + sk = skops->sk; + if (!sk) + return 1; + + /* update sk handler */ + helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST); + + return 1; +} + +SEC("sk_skb/stream_verdict") +int mptcp_sockmap_redirect(struct __sk_buff *skb) +{ + /* redirect skb to the sk under sock_map[redirect_idx] */ + return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0); +} From e1a97a627cd01d73fac5dd054d8f3de601ef2781 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 13 Nov 2025 16:35:50 -0600 Subject: [PATCH 1044/1075] x86/CPU/AMD: Add additional fixed RDSEED microcode revisions Microcode that resolves the RDSEED failure (SB-7055 [1]) has been released for additional Zen5 models to linux-firmware [2]. Update the zen5_rdseed_microcode array to cover these new models. Fixes: 607b9fb2ce24 ("x86/CPU/AMD: Add RDSEED fix for Zen5") Signed-off-by: Mario Limonciello Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-7055.html [1] Link: https://gitlab.com/kernel-firmware/linux-firmware/-/commit/6167e5566900cf236f7a69704e8f4c441bc7212a [2] Link: https://patch.msgid.link/20251113223608.1495655-1-mario.limonciello@amd.com --- arch/x86/kernel/cpu/amd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2ba9f2d42d8c..5d46709c58d0 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1037,7 +1037,14 @@ static void init_amd_zen4(struct cpuinfo_x86 *c) static const struct x86_cpu_id zen5_rdseed_microcode[] = { ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), + ZEN_MODEL_STEP_UCODE(0x1a, 0x08, 0x1, 0x0b008121), ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), + ZEN_MODEL_STEP_UCODE(0x1a, 0x24, 0x0, 0x0b204037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x0, 0x0b404035), + ZEN_MODEL_STEP_UCODE(0x1a, 0x44, 0x1, 0x0b404108), + ZEN_MODEL_STEP_UCODE(0x1a, 0x60, 0x0, 0x0b600037), + ZEN_MODEL_STEP_UCODE(0x1a, 0x68, 0x0, 0x0b608038), + ZEN_MODEL_STEP_UCODE(0x1a, 0x70, 0x0, 0x0b700037), {}, }; From dd14022a7ce96963aa923e35cf4bcc8c32f95840 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 14 Nov 2025 14:01:14 +0100 Subject: [PATCH 1045/1075] x86/microcode/AMD: Add Zen5 model 0x44, stepping 0x1 minrev Add the minimum Entrysign revision for that model+stepping to the list of minimum revisions. Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") Reported-by: Andrew Cooper Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/e94dd76b-4911-482f-8500-5c848a3df026@citrix.com --- arch/x86/kernel/cpu/microcode/amd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index dc82153009da..a881bf4c2011 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -224,6 +224,7 @@ static bool need_sha_check(u32 cur_rev) case 0xb1010: return cur_rev <= 0xb101046; break; case 0xb2040: return cur_rev <= 0xb204031; break; case 0xb4040: return cur_rev <= 0xb404031; break; + case 0xb4041: return cur_rev <= 0xb404101; break; case 0xb6000: return cur_rev <= 0xb600031; break; case 0xb6080: return cur_rev <= 0xb608031; break; case 0xb7000: return cur_rev <= 0xb700031; break; From 21a9ab5b90b3716a631d559e62818029b4e7f5b7 Mon Sep 17 00:00:00 2001 From: Lushih Hsieh Date: Fri, 14 Nov 2025 13:20:53 +0800 Subject: [PATCH 1046/1075] ALSA: usb-audio: Add native DSD quirks for PureAudio DAC series The PureAudio APA DAC and Lotus DAC5 series are USB Audio 2.0 Class devices that support native Direct Stream Digital (DSD) playback via specific vendor protocols. Without these quirks, the devices may only function in standard PCM mode, or fail to correctly report their DSD format capabilities to the ALSA framework, preventing native DSD playback under Linux. This commit adds new quirk entries for the mentioned DAC models based on their respective Vendor/Product IDs (VID:PID), for example: 0x16d0:0x0ab1 (APA DAC), 0x16d0:0xeca1 (DAC5 series), etc. The quirk ensures correct DSD format handling by setting the required SNDRV_PCM_FMTBIT_DSD_U32_BE format bit and defining the DSD-specific Audio Class 2.0 (AC2.0) endpoint configurations. This allows the ALSA DSD API to correctly address the device for high-bitrate DSD streams, bypassing the need for DoP (DSD over PCM). Test on APA DAC and Lotus DAC5 SE under Arch Linux. Tested-by: Lushih Hsieh Signed-off-by: Lushih Hsieh Link: https://patch.msgid.link/20251114052053.54989-1-bruce@mail.kh.edu.tw Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e5b857129caf..5e30bff69f82 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2022,6 +2022,8 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x16d0, 0x09d8): /* NuPrime IDA-8 */ case USB_ID(0x16d0, 0x09db): /* NuPrime Audio DAC-9 */ case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */ + case USB_ID(0x16d0, 0x0ab1): /* PureAudio APA DAC */ + case USB_ID(0x16d0, 0xeca1): /* PureAudio Lotus DAC5, DAC5 SE, DAC5 Pro */ case USB_ID(0x1db5, 0x0003): /* Bryston BDA3 */ case USB_ID(0x20a0, 0x4143): /* WaveIO USB Audio 2.0 */ case USB_ID(0x22e1, 0xca01): /* HDTA Serenade DSD */ @@ -2299,6 +2301,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_IGNORE_CLOCK_SOURCE), DEVICE_FLG(0x1686, 0x00dd, /* Zoom R16/24 */ QUIRK_FLAG_TX_LENGTH | QUIRK_FLAG_CTL_MSG_DELAY_1M), + DEVICE_FLG(0x16d0, 0x0ab1, /* PureAudio APA DAC */ + QUIRK_FLAG_DSD_RAW), + DEVICE_FLG(0x16d0, 0xeca1, /* PureAudio Lotus DAC5, DAC5 SE and DAC5 Pro */ + QUIRK_FLAG_DSD_RAW), DEVICE_FLG(0x17aa, 0x1046, /* Lenovo ThinkStation P620 Rear Line-in, Line-out and Microphone */ QUIRK_FLAG_DISABLE_AUTOSUSPEND), DEVICE_FLG(0x17aa, 0x104d, /* Lenovo ThinkStation P620 Internal Speaker + Front Headset */ From 37339122a7801660dce11abd817af82cc4bef163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 14 Nov 2025 23:18:50 +0900 Subject: [PATCH 1047/1075] firewire: core: Initialize topology_map.lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lockdep barfs on the new uninitialized spinlock. Initialize it. protip: enable lockdep (CONFIG_PROVE_LOCKING=y) when doing locking changes firewire_ohci 0000:02:01.1: added OHCI v1.10 device as card 0, 4 IR + 4 IT contexts, quirks 0x11 INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. CPU: 0 UID: 0 PID: 1042 Comm: irq/17-firewire Not tainted 6.17.0-rc2-cl-bisect2-00026-g7d138cb269db #136 PREEMPT Hardware name: Dell Inc. Latitude E5400 /0D695C, BIOS A19 06/13/2013 Call Trace: dump_stack_lvl+0x6d/0xa0 register_lock_class+0x783/0x790 ? find_held_lock+0x2b/0x80 ? __mod_timer+0x110/0x320 ? __mod_timer+0x110/0x320 __lock_acquire+0x405/0x2600 lock_acquire+0xca/0x2e0 ? fw_core_handle_bus_reset+0x888/0xca0 [firewire_core] ? fw_core_handle_bus_reset+0x878/0xca0 [firewire_core] ? fw_core_handle_bus_reset+0x878/0xca0 [firewire_core] _raw_spin_lock+0x2e/0x40 ? fw_core_handle_bus_reset+0x888/0xca0 [firewire_core] fw_core_handle_bus_reset+0x888/0xca0 [firewire_core] handle_selfid_complete_event+0x35c/0x7a0 [firewire_ohci] ? irq_thread+0x8d/0x280 irq_thread_fn+0x18/0x50 irq_thread+0x15a/0x280 ? irq_check_status_bit+0x100/0x100 ? lockdep_hardirqs_on+0x78/0x100 ? irq_finalize_oneshot.part.0+0xc0/0xc0 ? irq_forced_thread_fn+0x60/0x60 kthread+0x114/0x200 ? kthreads_online_cpu+0x110/0x110 ret_from_fork+0x158/0x1e0 ? kthreads_online_cpu+0x110/0x110 ret_from_fork_asm+0x11/0x20 Reported-by: Erhard Furtner Fixes: 7d138cb269db ("firewire: core: use spin lock specific to topology map") Signed-off-by: Ville Syrjälä Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-card.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index e5e0174a0335..66e1106db5e7 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -577,6 +577,8 @@ void fw_card_initialize(struct fw_card *card, INIT_LIST_HEAD(&card->transactions.list); spin_lock_init(&card->transactions.lock); + spin_lock_init(&card->topology_map.lock); + card->split_timeout.hi = DEFAULT_SPLIT_TIMEOUT / 8000; card->split_timeout.lo = (DEFAULT_SPLIT_TIMEOUT % 8000) << 19; card->split_timeout.cycles = DEFAULT_SPLIT_TIMEOUT; From 31475b88110c4725b4f9a79c3a0d9bbf97e69e1c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 13 Nov 2025 13:21:47 +0100 Subject: [PATCH 1048/1075] s390/mm: Fix __ptep_rdp() inline assembly When a zero ASCE is passed to the __ptep_rdp() inline assembly, the generated instruction should have the R3 field of the instruction set to zero. However the inline assembly is written incorrectly: for such cases a zero is loaded into a register allocated by the compiler and this register is then used by the instruction. This means that selected TLB entries may not be flushed since the specified ASCE does not match the one which was used when the selected TLB entries were created. Fix this by removing the asce and opt parameters of __ptep_rdp(), since all callers always pass zero, and use a hard-coded register zero for the R3 field. Fixes: 0807b856521f ("s390/mm: add support for RDP (Reset DAT-Protection)") Cc: stable@vger.kernel.org Reviewed-by: Gerald Schaefer Signed-off-by: Heiko Carstens --- arch/s390/include/asm/pgtable.h | 12 +++++------- arch/s390/mm/pgtable.c | 4 ++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index b7100c6a4054..6663f1619abb 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1154,17 +1154,15 @@ static inline pte_t pte_mkhuge(pte_t pte) #define IPTE_NODAT 0x400 #define IPTE_GUEST_ASCE 0x800 -static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, - unsigned long opt, unsigned long asce, - int local) +static __always_inline void __ptep_rdp(unsigned long addr, pte_t *ptep, int local) { unsigned long pto; pto = __pa(ptep) & ~(PTRS_PER_PTE * sizeof(pte_t) - 1); - asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%[asce],%[m4]" + asm volatile(".insn rrf,0xb98b0000,%[r1],%[r2],%%r0,%[m4]" : "+m" (*ptep) - : [r1] "a" (pto), [r2] "a" ((addr & PAGE_MASK) | opt), - [asce] "a" (asce), [m4] "i" (local)); + : [r1] "a" (pto), [r2] "a" (addr & PAGE_MASK), + [m4] "i" (local)); } static __always_inline void __ptep_ipte(unsigned long address, pte_t *ptep, @@ -1348,7 +1346,7 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, * A local RDP can be used to do the flush. */ if (cpu_has_rdp() && !(pte_val(*ptep) & _PAGE_PROTECT)) - __ptep_rdp(address, ptep, 0, 0, 1); + __ptep_rdp(address, ptep, 1); } #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0fde20bbc50b..05974304d622 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -274,9 +274,9 @@ void ptep_reset_dat_prot(struct mm_struct *mm, unsigned long addr, pte_t *ptep, preempt_disable(); atomic_inc(&mm->context.flush_count); if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_rdp(addr, ptep, 0, 0, 1); + __ptep_rdp(addr, ptep, 1); else - __ptep_rdp(addr, ptep, 0, 0, 0); + __ptep_rdp(addr, ptep, 0); /* * PTE is not invalidated by RDP, only _PAGE_PROTECT is cleared. That * means it is still valid and active, and must not be changed according From 14473a1f88596fd729e892782efc267c0097dd1d Mon Sep 17 00:00:00 2001 From: Nick Hu Date: Fri, 14 Nov 2025 15:28:44 +0800 Subject: [PATCH 1049/1075] irqchip/riscv-intc: Add missing free() callback in riscv_intc_domain_ops The irq_domain_free_irqs() helper requires that the irq_domain_ops->free callback is implemented. Otherwise, the kernel reports the warning message "NULL pointer, cannot free irq" when irq_dispose_mapping() is invoked to release the per-HART local interrupts. Set irq_domain_ops->free to irq_domain_free_irqs_top() to cure that. Fixes: 832f15f42646 ("RISC-V: Treat IPIs as normal Linux IRQs") Signed-off-by: Nick Hu Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251114-rv-intc-fix-v1-1-a3edd1c1a868@sifive.com --- drivers/irqchip/irq-riscv-intc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index e5805885394e..70290b35b317 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -166,7 +166,8 @@ static int riscv_intc_domain_alloc(struct irq_domain *domain, static const struct irq_domain_ops riscv_intc_domain_ops = { .map = riscv_intc_domain_map, .xlate = irq_domain_xlate_onecell, - .alloc = riscv_intc_domain_alloc + .alloc = riscv_intc_domain_alloc, + .free = irq_domain_free_irqs_top, }; static struct fwnode_handle *riscv_intc_hwnode(void) From e0fd4d42e27f761e9cc82801b3f183e658dc749d Mon Sep 17 00:00:00 2001 From: Eslam Khafagy Date: Fri, 14 Nov 2025 14:27:39 +0200 Subject: [PATCH 1050/1075] posix-timers: Plug potential memory leak in do_timer_create() When posix timer creation is set to allocate a given timer ID and the access to the user space value faults, the function terminates without freeing the already allocated posix timer structure. Move the allocation after the user space access to cure that. [ tglx: Massaged change log ] Fixes: ec2d0c04624b3 ("posix-timers: Provide a mechanism to allocate a given timer ID") Reported-by: syzbot+9c47ad18f978d4394986@syzkaller.appspotmail.com Suggested-by: Cyrill Gorcunov Signed-off-by: Eslam Khafagy Signed-off-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Link: https://patch.msgid.link/20251114122739.994326-1-eslam.medhat1993@gmail.com Closes: https://lore.kernel.org/all/69155df4.a70a0220.3124cb.0017.GAE@google.com/T/ --- kernel/time/posix-timers.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index aa3120104a51..56e17b625c72 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -475,12 +475,6 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, if (!kc->timer_create) return -EOPNOTSUPP; - new_timer = alloc_posix_timer(); - if (unlikely(!new_timer)) - return -EAGAIN; - - spin_lock_init(&new_timer->it_lock); - /* Special case for CRIU to restore timers with a given timer ID. */ if (unlikely(current->signal->timer_create_restore_ids)) { if (copy_from_user(&req_id, created_timer_id, sizeof(req_id))) @@ -490,6 +484,12 @@ static int do_timer_create(clockid_t which_clock, struct sigevent *event, return -EINVAL; } + new_timer = alloc_posix_timer(); + if (unlikely(!new_timer)) + return -EAGAIN; + + spin_lock_init(&new_timer->it_lock); + /* * Add the timer to the hash table. The timer is not yet valid * after insertion, but has a unique ID allocated. From 4ef92743625818932b9c320152b58274c05e5053 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2025 12:55:16 +0000 Subject: [PATCH 1051/1075] bpf: Add bpf_prog_run_data_pointers() syzbot found that cls_bpf_classify() is able to change tc_skb_cb(skb)->drop_reason triggering a warning in sk_skb_reason_drop(). WARNING: CPU: 0 PID: 5965 at net/core/skbuff.c:1192 __sk_skb_reason_drop net/core/skbuff.c:1189 [inline] WARNING: CPU: 0 PID: 5965 at net/core/skbuff.c:1192 sk_skb_reason_drop+0x76/0x170 net/core/skbuff.c:1214 struct tc_skb_cb has been added in commit ec624fe740b4 ("net/sched: Extend qdisc control block with tc control block"), which added a wrong interaction with db58ba459202 ("bpf: wire in data and data_end for cls_act_bpf"). drop_reason was added later. Add bpf_prog_run_data_pointers() helper to save/restore the net_sched storage colliding with BPF data_meta/data_end. Fixes: ec624fe740b4 ("net/sched: Extend qdisc control block with tc control block") Reported-by: syzbot Closes: https://lore.kernel.org/netdev/6913437c.a70a0220.22f260.013b.GAE@google.com/ Signed-off-by: Eric Dumazet Signed-off-by: Martin KaFai Lau Reviewed-by: Victor Nogueira Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20251112125516.1563021-1-edumazet@google.com --- include/linux/filter.h | 20 ++++++++++++++++++++ net/sched/act_bpf.c | 6 ++---- net/sched/cls_bpf.c | 6 ++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index f5c859b8131a..973233b82dc1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -901,6 +901,26 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) cb->data_end = skb->data + skb_headlen(skb); } +static inline int bpf_prog_run_data_pointers( + const struct bpf_prog *prog, + struct sk_buff *skb) +{ + struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; + void *save_data_meta, *save_data_end; + int res; + + save_data_meta = cb->data_meta; + save_data_end = cb->data_end; + + bpf_compute_data_pointers(skb); + res = bpf_prog_run(prog, skb); + + cb->data_meta = save_data_meta; + cb->data_end = save_data_end; + + return res; +} + /* Similar to bpf_compute_data_pointers(), except that save orginal * data in cb->data and cb->meta_data for restore. */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 396b576390d0..c2b5bc19e091 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -47,12 +47,10 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(filter, skb); + filter_res = bpf_prog_run_data_pointers(filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(filter, skb); + filter_res = bpf_prog_run_data_pointers(filter, skb); } if (unlikely(!skb->tstamp && skb->tstamp_type)) skb->tstamp_type = SKB_CLOCK_REALTIME; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 7fbe42f0e5c2..a32754a2658b 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -97,12 +97,10 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(prog->filter, skb); + filter_res = bpf_prog_run_data_pointers(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_pointers(skb); - filter_res = bpf_prog_run(prog->filter, skb); + filter_res = bpf_prog_run_data_pointers(prog->filter, skb); } if (unlikely(!skb->tstamp && skb->tstamp_type)) skb->tstamp_type = SKB_CLOCK_REALTIME; From b0c8e6d3d866b6a7f73877f71968dbffd27b7785 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 13 Nov 2025 18:57:29 -0800 Subject: [PATCH 1052/1075] bpf: account for current allocated stack depth in widen_imprecise_scalars() The usage pattern for widen_imprecise_scalars() looks as follows: prev_st = find_prev_entry(env, ...); queued_st = push_stack(...); widen_imprecise_scalars(env, prev_st, queued_st); Where prev_st is an ancestor of the queued_st in the explored states tree. This ancestor is not guaranteed to have same allocated stack depth as queued_st. E.g. in the following case: def main(): for i in 1..2: foo(i) // same callsite, differnt param def foo(i): if i == 1: use 128 bytes of stack iterator based loop Here, for a second 'foo' call prev_st->allocated_stack is 128, while queued_st->allocated_stack is much smaller. widen_imprecise_scalars() needs to take this into account and avoid accessing bpf_verifier_state->frame[*]->stack out of bounds. Fixes: 2793a8b015f7 ("bpf: exact states comparison for iterator convergence checks") Reported-by: Emil Tsalapatis Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251114025730.772723-1-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8314518c8d93..fbe4bb91c564 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8866,7 +8866,7 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env, struct bpf_verifier_state *cur) { struct bpf_func_state *fold, *fcur; - int i, fr; + int i, fr, num_slots; reset_idmap_scratch(env); for (fr = old->curframe; fr >= 0; fr--) { @@ -8879,7 +8879,9 @@ static int widen_imprecise_scalars(struct bpf_verifier_env *env, &fcur->regs[i], &env->idmap_scratch); - for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) { + num_slots = min(fold->allocated_stack / BPF_REG_SIZE, + fcur->allocated_stack / BPF_REG_SIZE); + for (i = 0; i < num_slots; i++) { if (!is_spilled_reg(&fold->stack[i]) || !is_spilled_reg(&fcur->stack[i])) continue; From 6c762611fed7365790000925f3d14f20037d0061 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 13 Nov 2025 18:57:30 -0800 Subject: [PATCH 1053/1075] selftests/bpf: Test widen_imprecise_scalars() with different stack depth A test case for a situation when widen_imprecise_scalars() is called with old->allocated_stack > cur->allocated_stack. Test structure: def widening_stack_size_bug(): r1 = 0 for r6 in 0..1: iterator_with_diff_stack_depth(r1) r1 = 42 def iterator_with_diff_stack_depth(r1): if r1 != 42: use 128 bytes of stack iterator based loop iterator_with_diff_stack_depth() is verified with r1 == 0 first and r1 == 42 next. Causing stack usage of 128 bytes on a first visit and 8 bytes on a second. Such arrangement triggered a KASAN error in widen_imprecise_scalars(). Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251114025730.772723-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/progs/iters_looping.c | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c index 05fa5ce7fc59..d00fd570255a 100644 --- a/tools/testing/selftests/bpf/progs/iters_looping.c +++ b/tools/testing/selftests/bpf/progs/iters_looping.c @@ -161,3 +161,56 @@ int simplest_loop(void *ctx) return 0; } + +__used +static void iterator_with_diff_stack_depth(int x) +{ + struct bpf_iter_num iter; + + asm volatile ( + "if r1 == 42 goto 0f;" + "*(u64 *)(r10 - 128) = 0;" + "0:" + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + /* consume next item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "goto 1b;" + "2:" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common, "r6" + ); +} + +SEC("socket") +__success +__naked int widening_stack_size_bug(void *ctx) +{ + /* + * Depending on iterator_with_diff_stack_depth() parameter value, + * subprogram stack depth is either 8 or 128 bytes. Arrange values so + * that it is 128 on a first call and 8 on a second. This triggered a + * bug in verifier's widen_imprecise_scalars() logic. + */ + asm volatile ( + "r6 = 0;" + "r1 = 0;" + "1:" + "call iterator_with_diff_stack_depth;" + "r1 = 42;" + "r6 += 1;" + "if r6 < 2 goto 1b;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} From 39231e8d6ba7f794b566fd91ebd88c0834a23b98 Mon Sep 17 00:00:00 2001 From: "David Hildenbrand (Red Hat)" Date: Fri, 14 Nov 2025 22:49:20 +0100 Subject: [PATCH 1054/1075] mm: fix MAX_FOLIO_ORDER on powerpc configs with hugetlb In the past, CONFIG_ARCH_HAS_GIGANTIC_PAGE indicated that we support runtime allocation of gigantic hugetlb folios. In the meantime it evolved into a generic way for the architecture to state that it supports gigantic hugetlb folios. In commit fae7d834c43c ("mm: add __dump_folio()") we started using CONFIG_ARCH_HAS_GIGANTIC_PAGE to decide MAX_FOLIO_ORDER: whether we could have folios larger than what the buddy can handle. In the context of that commit, we started using MAX_FOLIO_ORDER to detect page corruptions when dumping tail pages of folios. Before that commit, we assumed that we cannot have folios larger than the highest buddy order, which was obviously wrong. In commit 7b4f21f5e038 ("mm/hugetlb: check for unreasonable folio sizes when registering hstate"), we used MAX_FOLIO_ORDER to detect inconsistencies, and in fact, we found some now. Powerpc allows for configs that can allocate gigantic folio during boot (not at runtime), that do not set CONFIG_ARCH_HAS_GIGANTIC_PAGE and can exceed PUD_ORDER. To fix it, let's make powerpc select CONFIG_ARCH_HAS_GIGANTIC_PAGE with hugetlb on powerpc, and increase the maximum folio size with hugetlb to 16 GiB on 64bit (possible on arm64 and powerpc) and 1 GiB on 32 bit (powerpc). Note that on some powerpc configurations, whether we actually have gigantic pages depends on the setting of CONFIG_ARCH_FORCE_MAX_ORDER, but there is nothing really problematic about setting it unconditionally: we just try to keep the value small so we can better detect problems in __dump_folio() and inconsistencies around the expected largest folio in the system. Ideally, we'd have a better way to obtain the maximum hugetlb folio size and detect ourselves whether we really end up with gigantic folios. Let's defer bigger changes and fix the warnings first. While at it, handle gigantic DAX folios more clearly: DAX can only end up creating gigantic folios with HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD. Add a new Kconfig option HAVE_GIGANTIC_FOLIOS to make both cases clearer. In particular, worry about ARCH_HAS_GIGANTIC_PAGE only with HUGETLB_PAGE. Note: with enabling CONFIG_ARCH_HAS_GIGANTIC_PAGE on powerpc, we will now also allow for runtime allocations of folios in some more powerpc configs. I don't think this is a problem, but if it is we could handle it through __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED. While __dump_page()/__dump_folio was also problematic (not handling dumping of tail pages of such gigantic folios correctly), it doesn't seem critical enough to mark it as a fix. Link: https://lkml.kernel.org/r/20251114214920.2550676-1-david@kernel.org Fixes: 7b4f21f5e038 ("mm/hugetlb: check for unreasonable folio sizes when registering hstate") Reported-by: Christophe Leroy Closes: https://lore.kernel.org/r/3e043453-3f27-48ad-b987-cc39f523060a@csgroup.eu/ Reported-by: Sourabh Jain Closes: https://lore.kernel.org/r/94377f5c-d4f0-4c0f-b0f6-5bf1cd7305b1@linux.ibm.com/ Signed-off-by: David Hildenbrand (Red Hat) Cc: Ritesh Harjani (IBM) Cc: Madhavan Srinivasan Cc: Donet Tom Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: Lorenzo Stoakes Cc: "Liam R. Howlett" Cc: Vlastimil Babka Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Michal Hocko Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- arch/powerpc/Kconfig | 1 + arch/powerpc/platforms/Kconfig.cputype | 1 - include/linux/mm.h | 13 ++++++++++--- mm/Kconfig | 7 +++++++ 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e24f4d88885a..9537a61ebae0 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -137,6 +137,7 @@ config PPC select ARCH_HAS_DMA_OPS if PPC64 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_GIGANTIC_PAGE if ARCH_SUPPORTS_HUGETLBFS select ARCH_HAS_KCOV select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU select ARCH_HAS_MEMBARRIER_CALLBACKS diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 7b527d18aa5e..4c321a8ea896 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -423,7 +423,6 @@ config PPC_64S_HASH_MMU config PPC_RADIX_MMU bool "Radix MMU Support" depends on PPC_BOOK3S_64 - select ARCH_HAS_GIGANTIC_PAGE default y help Enable support for the Power ISA 3.0 Radix style MMU. Currently this diff --git a/include/linux/mm.h b/include/linux/mm.h index d16b33bacc32..7c79b3369b82 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2074,7 +2074,7 @@ static inline unsigned long folio_nr_pages(const struct folio *folio) return folio_large_nr_pages(folio); } -#if !defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) +#if !defined(CONFIG_HAVE_GIGANTIC_FOLIOS) /* * We don't expect any folios that exceed buddy sizes (and consequently * memory sections). @@ -2087,10 +2087,17 @@ static inline unsigned long folio_nr_pages(const struct folio *folio) * pages are guaranteed to be contiguous. */ #define MAX_FOLIO_ORDER PFN_SECTION_SHIFT -#else +#elif defined(CONFIG_HUGETLB_PAGE) /* * There is no real limit on the folio size. We limit them to the maximum we - * currently expect (e.g., hugetlb, dax). + * currently expect (see CONFIG_HAVE_GIGANTIC_FOLIOS): with hugetlb, we expect + * no folios larger than 16 GiB on 64bit and 1 GiB on 32bit. + */ +#define MAX_FOLIO_ORDER get_order(IS_ENABLED(CONFIG_64BIT) ? SZ_16G : SZ_1G) +#else +/* + * Without hugetlb, gigantic folios that are bigger than a single PUD are + * currently impossible. */ #define MAX_FOLIO_ORDER PUD_ORDER #endif diff --git a/mm/Kconfig b/mm/Kconfig index 0e26f4fc8717..ca3f146bc705 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -908,6 +908,13 @@ config PAGE_MAPCOUNT config PGTABLE_HAS_HUGE_LEAVES def_bool TRANSPARENT_HUGEPAGE || HUGETLB_PAGE +# +# We can end up creating gigantic folio. +# +config HAVE_GIGANTIC_FOLIOS + def_bool (HUGETLB_PAGE && ARCH_HAS_GIGANTIC_PAGE) || \ + (ZONE_DEVICE && HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) + # TODO: Allow to be enabled without THP config ARCH_SUPPORTS_HUGE_PFNMAP def_bool n From 00fbff75c5acb4755f06f08bd1071879c63940c5 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Sun, 2 Nov 2025 01:07:41 +0530 Subject: [PATCH 1055/1075] crash: fix crashkernel resource shrink When crashkernel is configured with a high reservation, shrinking its value below the low crashkernel reservation causes two issues: 1. Invalid crashkernel resource objects 2. Kernel crash if crashkernel shrinking is done twice For example, with crashkernel=200M,high, the kernel reserves 200MB of high memory and some default low memory (say 256MB). The reservation appears as: cat /proc/iomem | grep -i crash af000000-beffffff : Crash kernel 433000000-43f7fffff : Crash kernel If crashkernel is then shrunk to 50MB (echo 52428800 > /sys/kernel/kexec_crash_size), /proc/iomem still shows 256MB reserved: af000000-beffffff : Crash kernel Instead, it should show 50MB: af000000-b21fffff : Crash kernel Further shrinking crashkernel to 40MB causes a kernel crash with the following trace (x86): BUG: kernel NULL pointer dereference, address: 0000000000000038 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI Call Trace: ? __die_body.cold+0x19/0x27 ? page_fault_oops+0x15a/0x2f0 ? search_module_extables+0x19/0x60 ? search_bpf_extables+0x5f/0x80 ? exc_page_fault+0x7e/0x180 ? asm_exc_page_fault+0x26/0x30 ? __release_resource+0xd/0xb0 release_resource+0x26/0x40 __crash_shrink_memory+0xe5/0x110 crash_shrink_memory+0x12a/0x190 kexec_crash_size_store+0x41/0x80 kernfs_fop_write_iter+0x141/0x1f0 vfs_write+0x294/0x460 ksys_write+0x6d/0xf0 This happens because __crash_shrink_memory()/kernel/crash_core.c incorrectly updates the crashk_res resource object even when crashk_low_res should be updated. Fix this by ensuring the correct crashkernel resource object is updated when shrinking crashkernel memory. Link: https://lkml.kernel.org/r/20251101193741.289252-1-sourabhjain@linux.ibm.com Fixes: 16c6006af4d4 ("kexec: enable kexec_crash_size to support two crash kernel regions") Signed-off-by: Sourabh Jain Acked-by: Baoquan He Cc: Zhen Lei Cc: Signed-off-by: Andrew Morton --- kernel/crash_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/crash_core.c b/kernel/crash_core.c index 3b1c43382eec..99dac1aa972a 100644 --- a/kernel/crash_core.c +++ b/kernel/crash_core.c @@ -373,7 +373,7 @@ static int __crash_shrink_memory(struct resource *old_res, old_res->start = 0; old_res->end = 0; } else { - crashk_res.end = ram_res->start - 1; + old_res->end = ram_res->start - 1; } crash_free_reserved_phys_range(ram_res->start, ram_res->end); From 3470715e5c22578c6ea4098b256d5a904e12eef2 Mon Sep 17 00:00:00 2001 From: "David Hildenbrand (Red Hat)" Date: Mon, 3 Nov 2025 11:36:59 +0100 Subject: [PATCH 1056/1075] MAINTAINERS: update David Hildenbrand's email address Switch to kernel.org email address as I will be leaving Red Hat. The old address will remain active until end of January 2026, so performing the change now should make sure that most mails will reach me. Link: https://lkml.kernel.org/r/20251103103659.379335-1-david@kernel.org Signed-off-by: David Hildenbrand Signed-off-by: David Hildenbrand (Red Hat) Signed-off-by: Andrew Morton --- .mailmap | 1 + MAINTAINERS | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/.mailmap b/.mailmap index 369cfe467932..a14166f834a4 100644 --- a/.mailmap +++ b/.mailmap @@ -206,6 +206,7 @@ Danilo Krummrich David Brownell David Collins David Heidelberg +David Hildenbrand David Rheinsberg David Rheinsberg David Rheinsberg diff --git a/MAINTAINERS b/MAINTAINERS index 5b93346f464f..c39701eec3fe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11526,7 +11526,7 @@ F: include/linux/platform_data/huawei-gaokun-ec.h HUGETLB SUBSYSTEM M: Muchun Song M: Oscar Salvador -R: David Hildenbrand +R: David Hildenbrand L: linux-mm@kvack.org S: Maintained F: Documentation/ABI/testing/sysfs-kernel-mm-hugepages @@ -13733,7 +13733,7 @@ KERNEL VIRTUAL MACHINE for s390 (KVM/s390) M: Christian Borntraeger M: Janosch Frank M: Claudio Imbrenda -R: David Hildenbrand +R: David Hildenbrand L: kvm@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git @@ -16220,7 +16220,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/linux.git F: drivers/devfreq/tegra30-devfreq.c MEMORY HOT(UN)PLUG -M: David Hildenbrand +M: David Hildenbrand M: Oscar Salvador L: linux-mm@kvack.org S: Maintained @@ -16245,7 +16245,7 @@ F: tools/mm/ MEMORY MANAGEMENT - CORE M: Andrew Morton -M: David Hildenbrand +M: David Hildenbrand R: Lorenzo Stoakes R: Liam R. Howlett R: Vlastimil Babka @@ -16301,7 +16301,7 @@ F: mm/execmem.c MEMORY MANAGEMENT - GUP (GET USER PAGES) M: Andrew Morton -M: David Hildenbrand +M: David Hildenbrand R: Jason Gunthorpe R: John Hubbard R: Peter Xu @@ -16317,7 +16317,7 @@ F: tools/testing/selftests/mm/gup_test.c MEMORY MANAGEMENT - KSM (Kernel Samepage Merging) M: Andrew Morton -M: David Hildenbrand +M: David Hildenbrand R: Xu Xin R: Chengming Zhou L: linux-mm@kvack.org @@ -16333,7 +16333,7 @@ F: mm/mm_slot.h MEMORY MANAGEMENT - MEMORY POLICY AND MIGRATION M: Andrew Morton -M: David Hildenbrand +M: David Hildenbrand R: Zi Yan R: Matthew Brost R: Joshua Hahn @@ -16373,7 +16373,7 @@ F: mm/workingset.c MEMORY MANAGEMENT - MISC M: Andrew Morton -M: David Hildenbrand +M: David Hildenbrand R: Lorenzo Stoakes R: Liam R. Howlett R: Vlastimil Babka @@ -16461,7 +16461,7 @@ F: mm/shuffle.h MEMORY MANAGEMENT - RECLAIM M: Andrew Morton M: Johannes Weiner -R: David Hildenbrand +R: David Hildenbrand R: Michal Hocko R: Qi Zheng R: Shakeel Butt @@ -16474,7 +16474,7 @@ F: mm/workingset.c MEMORY MANAGEMENT - RMAP (REVERSE MAPPING) M: Andrew Morton -M: David Hildenbrand +M: David Hildenbrand M: Lorenzo Stoakes R: Rik van Riel R: Liam R. Howlett @@ -16519,7 +16519,7 @@ F: mm/swapfile.c MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE) M: Andrew Morton -M: David Hildenbrand +M: David Hildenbrand M: Lorenzo Stoakes R: Zi Yan R: Baolin Wang @@ -16621,7 +16621,7 @@ MEMORY MAPPING - MADVISE (MEMORY ADVICE) M: Andrew Morton M: Liam R. Howlett M: Lorenzo Stoakes -M: David Hildenbrand +M: David Hildenbrand R: Vlastimil Babka R: Jann Horn L: linux-mm@kvack.org @@ -27088,7 +27088,7 @@ F: net/vmw_vsock/virtio_transport_common.c VIRTIO BALLOON M: "Michael S. Tsirkin" -M: David Hildenbrand +M: David Hildenbrand L: virtualization@lists.linux.dev S: Maintained F: drivers/virtio/virtio_balloon.c @@ -27243,7 +27243,7 @@ F: drivers/iommu/virtio-iommu.c F: include/uapi/linux/virtio_iommu.h VIRTIO MEM DRIVER -M: David Hildenbrand +M: David Hildenbrand L: virtualization@lists.linux.dev S: Maintained W: https://virtio-mem.gitlab.io/ From f1d47cafe513b5552a5b20a7af0936d9070a8a78 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 5 Nov 2025 11:29:10 -0500 Subject: [PATCH 1057/1075] mm/huge_memory: fix folio split check for anon folios in swapcache Both uniform and non uniform split check missed the check to prevent splitting anon folios in swapcache to non-zero order. Splitting anon folios in swapcache to non-zero order can cause data corruption since swapcache only support PMD order and order-0 entries. This can happen when one use split_huge_pages under debugfs to split anon folios in swapcache. In-tree callers do not perform such an illegal operation. Only debugfs interface could trigger it. I will put adding a test case on my TODO list. Fix the check. Link: https://lkml.kernel.org/r/20251105162910.752266-1-ziy@nvidia.com Fixes: 58729c04cf10 ("mm/huge_memory: add buddy allocator like (non-uniform) folio_split()") Signed-off-by: Zi Yan Reported-by: "David Hildenbrand (Red Hat)" Closes: https://lore.kernel.org/all/dc0ecc2c-4089-484f-917f-920fdca4c898@kernel.org/ Acked-by: David Hildenbrand (Red Hat) Cc: Baolin Wang Cc: Barry Song Cc: Dev Jain Cc: Lance Yang Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Nico Pache Cc: Ryan Roberts Cc: Wei Yang Cc: Signed-off-by: Andrew Morton --- mm/huge_memory.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 323654fb4f8c..2f2a521e5d68 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3522,7 +3522,8 @@ bool non_uniform_split_supported(struct folio *folio, unsigned int new_order, /* order-1 is not supported for anonymous THP. */ VM_WARN_ONCE(warns && new_order == 1, "Cannot split to order-1 folio"); - return new_order != 1; + if (new_order == 1) + return false; } else if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !mapping_large_folio_support(folio->mapping)) { /* @@ -3553,7 +3554,8 @@ bool uniform_split_supported(struct folio *folio, unsigned int new_order, if (folio_test_anon(folio)) { VM_WARN_ONCE(warns && new_order == 1, "Cannot split to order-1 folio"); - return new_order != 1; + if (new_order == 1) + return false; } else if (new_order) { if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !mapping_large_folio_support(folio->mapping)) { From a26ec8f3d4e56d4a7ffa301e8032dca9df0bbc05 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Thu, 6 Nov 2025 17:06:35 -0500 Subject: [PATCH 1058/1075] lib/test_kho: check if KHO is enabled We must check whether KHO is enabled prior to issuing KHO commands, otherwise KHO internal data structures are not initialized. Link: https://lkml.kernel.org/r/20251106220635.2608494-1-pasha.tatashin@soleen.com Fixes: b753522bed0b ("kho: add test for kexec handover") Signed-off-by: Pasha Tatashin Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202511061629.e242724-lkp@intel.com Reviewed-by: Pratyush Yadav Reviewed-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Signed-off-by: Andrew Morton --- lib/test_kho.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/test_kho.c b/lib/test_kho.c index 60cd899ea745..fff018e5548d 100644 --- a/lib/test_kho.c +++ b/lib/test_kho.c @@ -301,6 +301,9 @@ static int __init kho_test_init(void) phys_addr_t fdt_phys; int err; + if (!kho_is_enabled()) + return 0; + err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys); if (!err) return kho_test_restore(fdt_phys); From 216158f063fe24fb003bd7da0cd92cd6e2c4d48b Mon Sep 17 00:00:00 2001 From: Ankit Khushwaha Date: Thu, 6 Nov 2025 15:25:32 +0530 Subject: [PATCH 1059/1075] selftests/user_events: fix type cast for write_index packed member in perf_test Accessing 'reg.write_index' directly triggers a -Waddress-of-packed-member warning due to potential unaligned pointer access: perf_test.c:239:38: warning: taking address of packed member 'write_index' of class or structure 'user_reg' may result in an unaligned pointer value [-Waddress-of-packed-member] 239 | ASSERT_NE(-1, write(self->data_fd, ®.write_index, | ^~~~~~~~~~~~~~~ Since write(2) works with any alignment. Casting '®.write_index' explicitly to 'void *' to suppress this warning. Link: https://lkml.kernel.org/r/20251106095532.15185-1-ankitkhushwaha.linux@gmail.com Fixes: 42187bdc3ca4 ("selftests/user_events: Add perf self-test for empty arguments events") Signed-off-by: Ankit Khushwaha Cc: Beau Belgrave Cc: "Masami Hiramatsu (Google)" Cc: Steven Rostedt Cc: sunliming Cc: Wei Yang Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/user_events/perf_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index 5288e768b207..68625362add2 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -236,7 +236,7 @@ TEST_F(user, perf_empty_events) { ASSERT_EQ(1 << reg.enable_bit, self->check); /* Ensure write shows up at correct offset */ - ASSERT_NE(-1, write(self->data_fd, ®.write_index, + ASSERT_NE(-1, write(self->data_fd, (void *)®.write_index, sizeof(reg.write_index))); val = (void *)(((char *)perf_page) + perf_page->data_offset); ASSERT_EQ(PERF_RECORD_SAMPLE, *val); From 1c2a936edd71e133f2806e68324ec81a4eb07588 Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Tue, 11 Nov 2025 21:36:08 +0800 Subject: [PATCH 1060/1075] mm, swap: fix potential UAF issue for VMA readahead Since commit 78524b05f1a3 ("mm, swap: avoid redundant swap device pinning"), the common helper for allocating and preparing a folio in the swap cache layer no longer tries to get a swap device reference internally, because all callers of __read_swap_cache_async are already holding a swap entry reference. The repeated swap device pinning isn't needed on the same swap device. Caller of VMA readahead is also holding a reference to the target entry's swap device, but VMA readahead walks the page table, so it might encounter swap entries from other devices, and call __read_swap_cache_async on another device without holding a reference to it. So it is possible to cause a UAF when swapoff of device A raced with swapin on device B, and VMA readahead tries to read swap entries from device A. It's not easy to trigger, but in theory, it could cause real issues. Make VMA readahead try to get the device reference first if the swap device is a different one from the target entry. Link: https://lkml.kernel.org/r/20251111-swap-fix-vma-uaf-v1-1-41c660e58562@tencent.com Fixes: 78524b05f1a3 ("mm, swap: avoid redundant swap device pinning") Suggested-by: Huang Ying Signed-off-by: Kairui Song Acked-by: Chris Li Cc: Baoquan He Cc: Barry Song Cc: Kemeng Shi Cc: Nhat Pham Cc: Signed-off-by: Andrew Morton --- mm/swap_state.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/mm/swap_state.c b/mm/swap_state.c index b13e9c4baa90..f4980dde5394 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -748,6 +748,8 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, blk_start_plug(&plug); for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) { + struct swap_info_struct *si = NULL; + if (!pte++) { pte = pte_offset_map(vmf->pmd, addr); if (!pte) @@ -761,8 +763,19 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, continue; pte_unmap(pte); pte = NULL; + /* + * Readahead entry may come from a device that we are not + * holding a reference to, try to grab a reference, or skip. + */ + if (swp_type(entry) != swp_type(targ_entry)) { + si = get_swap_device(entry); + if (!si) + continue; + } folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx, &page_allocated, false); + if (si) + put_swap_device(si); if (!folio) continue; if (page_allocated) { From 1107aac1ad7f445a83604b14af7be47f1a795c66 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 14 Nov 2025 23:44:21 +0900 Subject: [PATCH 1061/1075] firewire: core: fix to update generation field in topology map The generation field of topology map is updated after initialized by zero. The updated value of generation field is always zero, and is against specification. This commit fixes the bug. Fixes: 7d138cb269db ("firewire: core: use spin lock specific to topology map") Link: https://lore.kernel.org/r/20251114144421.415278-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-topology.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index 2f73bcd5696f..ed3ae8cdb0cd 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -441,12 +441,13 @@ static void update_topology_map(__be32 *buffer, size_t buffer_size, int root_nod const u32 *self_ids, int self_id_count) { __be32 *map = buffer; + u32 next_generation = be32_to_cpu(buffer[1]) + 1; int node_count = (root_node_id & 0x3f) + 1; memset(map, 0, buffer_size); *map++ = cpu_to_be32((self_id_count + 2) << 16); - *map++ = cpu_to_be32(be32_to_cpu(buffer[1]) + 1); + *map++ = cpu_to_be32(next_generation); *map++ = cpu_to_be32((node_count << 16) | self_id_count); while (self_id_count--) From 6a23ae0a96a600d1d12557add110e0bb6e32730c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Nov 2025 14:25:38 -0800 Subject: [PATCH 1062/1075] Linux 6.18-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index fb4389aa5d5f..d763c2c75cdb 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* From 36c6f3c03d104faf1aa90922f2310549c175420f Mon Sep 17 00:00:00 2001 From: Zqiang Date: Mon, 17 Nov 2025 20:53:10 +0800 Subject: [PATCH 1063/1075] sched_ext: Use IRQ_WORK_INIT_HARD() to initialize rq->scx.kick_cpus_irq_work For PREEMPT_RT kernels, the kick_cpus_irq_workfn() be invoked in the per-cpu irq_work/* task context and there is no rcu-read critical section to protect. this commit therefore use IRQ_WORK_INIT_HARD() to initialize the per-cpu rq->scx.kick_cpus_irq_work in the init_sched_ext_class(). Signed-off-by: Zqiang Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 07399210ac2d..7aae1d0ce37e 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5322,7 +5322,7 @@ void __init init_sched_ext_class(void) BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n)); BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n)); rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn); - init_irq_work(&rq->scx.kick_cpus_irq_work, kick_cpus_irq_workfn); + rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn); if (cpu_online(cpu)) cpu_rq(cpu)->scx.flags |= SCX_RQ_ONLINE; From 5bebe8de19264946d398ead4e6c20c229454a552 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 18 Nov 2025 08:21:27 -0800 Subject: [PATCH 1064/1075] mm/huge_memory: Fix initialization of huge zero folio The recent fix to properly initialize the tags of the huge zero folio had an unfortunate not-so-subtle side effect: it caused the actual *contents* of the huge zero folio to not be initialized at all when the hardware didn't support the memory tagging. The reason was the unfortunate semantics of tag_clear_highpage(): on hardware that didn't do the tagging, it would silently just not do anything at all. And since this is done only on arm64 with MTE support, that basically meant most hardware. It wasn't necessarily immediately obvious since the huge zero page isn't necessarily very heavily used - or because it might already be zero because all-zeroes is the most common pattern. But it ends up causing random odd user space failures when you do hit it. The unfortunate semantics have been around for a while, but became a real bug only when we started actively using __GFP_ZEROTAGS in the generic get_huge_zero_folio() function - before that, it had only ever been used in code that checked that the hardware supported it. Fix this by simply changing the semantics of tag_clear_highpage() to return whether it actually successfully did something or not. While at it, also make it initialize multiple pages in one go, since that's actually what the only caller wants it to do and it simplifies the whole logic. Fixes: adfb6609c680 ("mm/huge_memory: initialise the tags of the huge zero folio") Link: https://lore.kernel.org/all/20251117082023.90176-1-00107082@163.com/ Reviewed-by: David Hildenbrand (Red Hat) Reported-and-tested-by: David Wang <00107082@163.com> Reported-and-tested-by: Carlos Llamas Signed-off-by: Linus Torvalds --- arch/arm64/include/asm/page.h | 4 ++-- arch/arm64/mm/fault.c | 21 +++++++++++---------- include/linux/highmem.h | 6 ++++-- mm/page_alloc.c | 9 ++------- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 2312e6ee595f..258cca4b4873 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,8 +33,8 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, unsigned long vaddr); #define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio -void tag_clear_highpage(struct page *to); -#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE +bool tag_clear_highpages(struct page *to, int numpages); +#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 125dfa6c613b..a193b6a5d1e6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -967,20 +967,21 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma, return vma_alloc_folio(flags, 0, vma, vaddr); } -void tag_clear_highpage(struct page *page) +bool tag_clear_highpages(struct page *page, int numpages) { /* * Check if MTE is supported and fall back to clear_highpage(). * get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and - * post_alloc_hook() will invoke tag_clear_highpage(). + * post_alloc_hook() will invoke tag_clear_highpages(). */ - if (!system_supports_mte()) { - clear_highpage(page); - return; - } + if (!system_supports_mte()) + return false; - /* Newly allocated page, shouldn't have been tagged yet */ - WARN_ON_ONCE(!try_page_mte_tagging(page)); - mte_zero_clear_page_tags(page_address(page)); - set_page_mte_tagged(page); + /* Newly allocated pages, shouldn't have been tagged yet */ + for (int i = 0; i < numpages; i++, page++) { + WARN_ON_ONCE(!try_page_mte_tagging(page)); + mte_zero_clear_page_tags(page_address(page)); + set_page_mte_tagged(page); + } + return true; } diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 105cc4c00cc3..abc20f9810fd 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -249,10 +249,12 @@ static inline void clear_highpage_kasan_tagged(struct page *page) kunmap_local(kaddr); } -#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE +#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGES -static inline void tag_clear_highpage(struct page *page) +/* Return false to let people know we did not initialize the pages */ +static inline bool tag_clear_highpages(struct page *page, int numpages) { + return false; } #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 600d9e981c23..ed82ee55e66a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1822,14 +1822,9 @@ inline void post_alloc_hook(struct page *page, unsigned int order, * If memory tags should be zeroed * (which happens only when memory should be initialized as well). */ - if (zero_tags) { - /* Initialize both memory and memory tags. */ - for (i = 0; i != 1 << order; ++i) - tag_clear_highpage(page + i); + if (zero_tags) + init = !tag_clear_highpages(page, 1 << order); - /* Take note that memory was initialized by the loop above. */ - init = false; - } if (!should_skip_kasan_unpoison(gfp_flags) && kasan_unpoison_pages(page, order, init)) { /* Take note that memory was initialized by KASAN. */ From 3fa05f96fc08dff5e846c2cc283a249c1bf029a1 Mon Sep 17 00:00:00 2001 From: Yosry Ahmed Date: Wed, 12 Nov 2025 01:30:17 +0000 Subject: [PATCH 1065/1075] KVM: SVM: Fix redundant updates of LBR MSR intercepts Don't update the LBR MSR intercept bitmaps if they're already up-to-date, as unconditionally updating the intercepts forces KVM to recalculate the MSR bitmaps for vmcb02 on every nested VMRUN. The redundant updates are functionally okay; however, they neuter an optimization in Hyper-V nested virtualization enlightenments and this manifests as a self-test failure. In particular, Hyper-V lets L1 mark "nested enlightenments" as clean, i.e. tell KVM that no changes were made to the MSR bitmap since the last VMRUN. The hyperv_svm_test KVM selftest intentionally changes the MSR bitmap "without telling KVM about it" to verify that KVM honors the clean hint, correctly fails because KVM notices the changed bitmap anyway: ==== Test Assertion Failure ==== x86/hyperv_svm_test.c:120: vmcb->control.exit_code == 0x081 pid=193558 tid=193558 errno=4 - Interrupted system call 1 0x0000000000411361: assert_on_unhandled_exception at processor.c:659 2 0x0000000000406186: _vcpu_run at kvm_util.c:1699 3 (inlined by) vcpu_run at kvm_util.c:1710 4 0x0000000000401f2a: main at hyperv_svm_test.c:175 5 0x000000000041d0d3: __libc_start_call_main at libc-start.o:? 6 0x000000000041f27c: __libc_start_main_impl at ??:? 7 0x00000000004021a0: _start at ??:? vmcb->control.exit_code == SVM_EXIT_VMMCALL Do *not* fix this by skipping svm_hv_vmcb_dirty_nested_enlightenments() when svm_set_intercept_for_msr() performs a no-op change. changes to the L0 MSR interception bitmap are only triggered by full CPUID updates and MSR filter updates, both of which should be rare. Changing svm_set_intercept_for_msr() risks hiding unintended pessimizations like this one, and is actually more complex than this change. Fixes: fbe5e5f030c2 ("KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv()") Cc: stable@vger.kernel.org Signed-off-by: Yosry Ahmed Link: https://patch.msgid.link/20251112013017.1836863-1-yosry.ahmed@linux.dev [Rewritten commit message based on mailing list discussion. - Paolo] Reviewed-by: Sean Christopherson Tested-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 9 ++++++++- arch/x86/kvm/svm/svm.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 10c21e4c5406..9d29b2e7e855 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -705,7 +705,11 @@ void *svm_alloc_permissions_map(unsigned long size, gfp_t gfp_mask) static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu) { - bool intercept = !(to_svm(vcpu)->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK); + struct vcpu_svm *svm = to_svm(vcpu); + bool intercept = !(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK); + + if (intercept == svm->lbr_msrs_intercepted) + return; svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHFROMIP, MSR_TYPE_RW, intercept); svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHTOIP, MSR_TYPE_RW, intercept); @@ -714,6 +718,8 @@ static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu) if (sev_es_guest(vcpu->kvm)) svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, intercept); + + svm->lbr_msrs_intercepted = intercept; } void svm_vcpu_free_msrpm(void *msrpm) @@ -1221,6 +1227,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) } svm->x2avic_msrs_intercepted = true; + svm->lbr_msrs_intercepted = true; svm->vmcb01.ptr = page_address(vmcb01_page); svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index c856d8e0f95e..dd78e6402345 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -336,6 +336,7 @@ struct vcpu_svm { bool guest_state_loaded; bool x2avic_msrs_intercepted; + bool lbr_msrs_intercepted; /* Guest GIF value, used when vGIF is not enabled */ bool guest_gif; From 501ea61e2933ee61933ff2f8dd2844e154dd2746 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:22 +0100 Subject: [PATCH 1066/1075] HID: bpf: Add support for the Inspiroy 2M There are a differences in the report descriptor to the existing Inspiroy 2S which makes having this as separate file a more efficient approach than merging them together. Signed-off-by: Peter Hutterer Signed-off-by: Benjamin Tissoires Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/167 Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .../hid/bpf/progs/Huion__Inspiroy-2-M.bpf.c | 563 ++++++++++++++++++ 1 file changed, 563 insertions(+) create mode 100644 drivers/hid/bpf/progs/Huion__Inspiroy-2-M.bpf.c diff --git a/drivers/hid/bpf/progs/Huion__Inspiroy-2-M.bpf.c b/drivers/hid/bpf/progs/Huion__Inspiroy-2-M.bpf.c new file mode 100644 index 000000000000..183d408d893a --- /dev/null +++ b/drivers/hid/bpf/progs/Huion__Inspiroy-2-M.bpf.c @@ -0,0 +1,563 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2024 Red Hat, Inc + */ + +#include "vmlinux.h" +#include "hid_bpf.h" +#include "hid_bpf_helpers.h" +#include "hid_report_helpers.h" +#include + +#define VID_HUION 0x256C +#define PID_INSPIROY_2_M 0x0067 + +HID_BPF_CONFIG( + HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, VID_HUION, PID_INSPIROY_2_M), +); + +/* Filled in by udev-hid-bpf */ +char UDEV_PROP_HUION_FIRMWARE_ID[64]; + +/* The prefix of the firmware ID we expect for this device. The full firmware + * string has a date suffix, e.g. HUION_T21j_221221 + */ +char EXPECTED_FIRMWARE_ID[] = "HUION_T21k_"; + +/* How this BPF program works: the tablet has two modes, firmware mode and + * tablet mode. In firmware mode (out of the box) the tablet sends button events + * and the dial as keyboard combinations. In tablet mode it uses a vendor specific + * hid report to report everything instead. + * Depending on the mode some hid reports are never sent and the corresponding + * devices are mute. + * + * To switch the tablet use e.g. https://github.com/whot/huion-switcher + * or one of the tools from the digimend project + * + * This BPF works for both modes. The huion-switcher tool sets the + * HUION_FIRMWARE_ID udev property - if that is set then we disable the firmware + * pad and pen reports (by making them vendor collections that are ignored). + * If that property is not set we fix all hidraw nodes so the tablet works in + * either mode though the drawback is that the device will show up twice if + * you bind it to all event nodes + * + * Default report descriptor for the first exposed hidraw node: + * + * # HUION Huion Tablet_H641P + * # Report descriptor length: 18 bytes + * # 0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page 0xFF00) 0 + * # 0x09, 0x01, // Usage (Vendor Usage 0x01) 3 + * # 0xa1, 0x01, // Collection (Application) 5 + * # 0x85, 0x08, // Report ID (8) 7 + * # 0x75, 0x58, // Report Size (88) 9 + * # 0x95, 0x01, // Report Count (1) 11 + * # 0x09, 0x01, // Usage (Vendor Usage 0x01) 13 + * # 0x81, 0x02, // Input (Data,Var,Abs) 15 + * # 0xc0, // End Collection 17 + * R: 18 06 00 ff 09 01 a1 01 85 08 75 58 95 01 09 01 81 02 c0 + * + * This rdesc does nothing until the tablet is switched to raw mode, see + * https://github.com/whot/huion-switcher + * + * + * Second hidraw node is the Pen. This one sends events until the tablet is + * switched to raw mode, then it's mute. + * + * # Report descriptor length: 93 bytes + * # 0x05, 0x0d, // Usage Page (Digitizers) 0 + * # 0x09, 0x02, // Usage (Pen) 2 + * # 0xa1, 0x01, // Collection (Application) 4 + * # 0x85, 0x0a, // Report ID (10) 6 + * # 0x09, 0x20, // Usage (Stylus) 8 + * # 0xa1, 0x01, // Collection (Application) 10 + * # 0x09, 0x42, // Usage (Tip Switch) 12 + * # 0x09, 0x44, // Usage (Barrel Switch) 14 + * # 0x09, 0x45, // Usage (Eraser) 16 + * # 0x09, 0x3c, // Usage (Invert) 18 <-- has no Invert eraser + * # 0x15, 0x00, // Logical Minimum (0) 20 + * # 0x25, 0x01, // Logical Maximum (1) 22 + * # 0x75, 0x01, // Report Size (1) 24 + * # 0x95, 0x06, // Report Count (6) 26 + * # 0x81, 0x02, // Input (Data,Var,Abs) 28 + * # 0x09, 0x32, // Usage (In Range) 30 + * # 0x75, 0x01, // Report Size (1) 32 + * # 0x95, 0x01, // Report Count (1) 34 + * # 0x81, 0x02, // Input (Data,Var,Abs) 36 + * # 0x81, 0x03, // Input (Cnst,Var,Abs) 38 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 40 + * # 0x09, 0x30, // Usage (X) 42 + * # 0x09, 0x31, // Usage (Y) 44 + * # 0x55, 0x0d, // Unit Exponent (-3) 46 <-- change to -2 + * # 0x65, 0x33, // Unit (EnglishLinear: in³) 48 <-- change in³ to in + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 50 + * # 0x35, 0x00, // Physical Minimum (0) 53 + * # 0x46, 0x00, 0x08, // Physical Maximum (2048) 55 <-- invalid size + * # 0x75, 0x10, // Report Size (16) 58 + * # 0x95, 0x02, // Report Count (2) 60 + * # 0x81, 0x02, // Input (Data,Var,Abs) 62 + * # 0x05, 0x0d, // Usage Page (Digitizers) 64 + * # 0x09, 0x30, // Usage (Tip Pressure) 66 + * # 0x26, 0xff, 0x1f, // Logical Maximum (8191) 68 + * # 0x75, 0x10, // Report Size (16) 71 + * # 0x95, 0x01, // Report Count (1) 73 + * # 0x81, 0x02, // Input (Data,Var,Abs) 75 + * # 0x09, 0x3d, // Usage (X Tilt) 77 <-- No tilt reported + * # 0x09, 0x3e, // Usage (Y Tilt) 79 + * # 0x15, 0x81, // Logical Minimum (-127) 81 + * # 0x25, 0x7f, // Logical Maximum (127) 83 + * # 0x75, 0x08, // Report Size (8) 85 + * # 0x95, 0x02, // Report Count (2) 87 + * # 0x81, 0x02, // Input (Data,Var,Abs) 89 + * # 0xc0, // End Collection 91 + * # 0xc0, // End Collection 92 + * R: 93 05 0d 09 02 a1 01 85 0a 09 20 a1 01 09 42 09 44 09 45 09 3c 15 00 25 01 7501 95 06 81 02 09 32 75 01 95 01 81 02 81 03 05 01 09 30 09 31 55 0d 65 33 26 ff7f 35 00 46 00 08 75 10 95 02 81 02 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 09 3d09 3e 15 81 25 7f 75 08 95 02 81 02 c0 c0 + * + * Third hidraw node is the pad which sends a combination of keyboard shortcuts until + * the tablet is switched to raw mode, then it's mute: + * + * # Report descriptor length: 65 bytes + * # 0x05, 0x01, // Usage Page (Generic Desktop) 0 + * # 0x09, 0x06, // Usage (Keyboard) 2 + * # 0xa1, 0x01, // Collection (Application) 4 + * # 0x85, 0x03, // Report ID (3) 6 + * # 0x05, 0x07, // Usage Page (Keyboard/Keypad) 8 + * # 0x19, 0xe0, // UsageMinimum (224) 10 + * # 0x29, 0xe7, // UsageMaximum (231) 12 + * # 0x15, 0x00, // Logical Minimum (0) 14 + * # 0x25, 0x01, // Logical Maximum (1) 16 + * # 0x75, 0x01, // Report Size (1) 18 + * # 0x95, 0x08, // Report Count (8) 20 + * # 0x81, 0x02, // Input (Data,Var,Abs) 22 + * # 0x05, 0x07, // Usage Page (Keyboard/Keypad) 24 + * # 0x19, 0x00, // UsageMinimum (0) 26 + * # 0x29, 0xff, // UsageMaximum (255) 28 + * # 0x26, 0xff, 0x00, // Logical Maximum (255) 30 + * # 0x75, 0x08, // Report Size (8) 33 + * # 0x95, 0x06, // Report Count (6) 35 + * # 0x81, 0x00, // Input (Data,Arr,Abs) 37 + * # 0xc0, // End Collection 39 + * # 0x05, 0x0c, // Usage Page (Consumer) 40 + * # 0x09, 0x01, // Usage (Consumer Control) 42 + * # 0xa1, 0x01, // Collection (Application) 44 + * # 0x85, 0x04, // Report ID (4) 46 + * # 0x19, 0x00, // UsageMinimum (0) 48 + * # 0x2a, 0x3c, 0x02, // UsageMaximum (572) 50 + * # 0x15, 0x00, // Logical Minimum (0) 53 + * # 0x26, 0x3c, 0x02, // Logical Maximum (572) 55 + * # 0x95, 0x01, // Report Count (1) 58 + * # 0x75, 0x10, // Report Size (16) 60 + * # 0x81, 0x00, // Input (Data,Arr,Abs) 62 + * # 0xc0, // End Collection 64 + * R: 65 05 01 09 06 a1 01 85 03 05 07 19 e0 29 e7 15 00 25 01 75 01 95 08 81 02 0507 19 00 29 ff 26 ff 00 75 08 95 06 81 00 c0 05 0c 09 01 a1 01 85 04 19 00 2a 3c02 15 00 26 3c 02 95 01 75 10 81 00 c0 + * N: HUION Huion Tablet_H641P + */ + +#define PAD_REPORT_DESCRIPTOR_LENGTH 133 +#define PEN_REPORT_DESCRIPTOR_LENGTH 93 +#define VENDOR_REPORT_DESCRIPTOR_LENGTH 36 +#define PAD_REPORT_ID 3 +#define PEN_REPORT_ID 10 +#define VENDOR_REPORT_ID 8 +#define PAD_REPORT_LENGTH 8 +#define PEN_REPORT_LENGTH 10 +#define VENDOR_REPORT_LENGTH 12 + + +__u16 last_button_state; + +static const __u8 fixed_rdesc_pad[] = { + UsagePage_GenericDesktop + Usage_GD_Keypad + CollectionApplication( + // -- Byte 0 in report + ReportId(PAD_REPORT_ID) + LogicalMinimum_i8(0) + LogicalMaximum_i8(1) + UsagePage_Digitizers + Usage_Dig_TabletFunctionKeys + CollectionPhysical( + // Byte 1 in report - just exists so we get to be a tablet pad + Usage_Dig_BarrelSwitch // BTN_STYLUS + ReportCount(1) + ReportSize(1) + Input(Var|Abs) + ReportCount(7) // padding + Input(Const) + // Bytes 2/3 in report - just exists so we get to be a tablet pad + UsagePage_GenericDesktop + Usage_GD_X + Usage_GD_Y + ReportCount(2) + ReportSize(8) + Input(Var|Abs) + // Byte 4 in report is the wheel + Usage_GD_Wheel + LogicalMinimum_i8(-1) + LogicalMaximum_i8(1) + ReportCount(1) + ReportSize(8) + Input(Var|Rel) + // Byte 5 is the button state + UsagePage_Button + UsageMinimum_i8(0x1) + UsageMaximum_i8(0x8) + LogicalMinimum_i8(0x1) + LogicalMaximum_i8(0x8) + ReportCount(1) + ReportSize(8) + Input(Arr|Abs) + ) + // Make sure we match our original report length + FixedSizeVendorReport(PAD_REPORT_LENGTH) + ) +}; + +static const __u8 fixed_rdesc_pen[] = { + UsagePage_Digitizers + Usage_Dig_Pen + CollectionApplication( + // -- Byte 0 in report + ReportId(PEN_REPORT_ID) + Usage_Dig_Pen + CollectionPhysical( + // -- Byte 1 in report + Usage_Dig_TipSwitch + Usage_Dig_BarrelSwitch + Usage_Dig_SecondaryBarrelSwitch // maps eraser to BTN_STYLUS2 + LogicalMinimum_i8(0) + LogicalMaximum_i8(1) + ReportSize(1) + ReportCount(3) + Input(Var|Abs) + ReportCount(4) // Padding + Input(Const) + Usage_Dig_InRange + ReportCount(1) + Input(Var|Abs) + ReportSize(16) + ReportCount(1) + PushPop( + UsagePage_GenericDesktop + Unit(cm) + UnitExponent(-1) + PhysicalMinimum_i16(0) + PhysicalMaximum_i16(160) + LogicalMinimum_i16(0) + LogicalMaximum_i16(32767) + Usage_GD_X + Input(Var|Abs) // Bytes 2+3 + PhysicalMinimum_i16(0) + PhysicalMaximum_i16(100) + LogicalMinimum_i16(0) + LogicalMaximum_i16(32767) + Usage_GD_Y + Input(Var|Abs) // Bytes 4+5 + ) + UsagePage_Digitizers + Usage_Dig_TipPressure + LogicalMinimum_i16(0) + LogicalMaximum_i16(8191) + Input(Var|Abs) // Byte 6+7 + // Two bytes padding so we don't need to change the report at all + ReportSize(8) + ReportCount(2) + Input(Const) // Byte 6+7 + ) + ) +}; + +static const __u8 fixed_rdesc_vendor[] = { + UsagePage_Digitizers + Usage_Dig_Pen + CollectionApplication( + // Byte 0 + // We leave the pen on the vendor report ID + ReportId(VENDOR_REPORT_ID) + Usage_Dig_Pen + CollectionPhysical( + // Byte 1 are the buttons + LogicalMinimum_i8(0) + LogicalMaximum_i8(1) + ReportSize(1) + Usage_Dig_TipSwitch + Usage_Dig_BarrelSwitch + Usage_Dig_SecondaryBarrelSwitch + ReportCount(3) + Input(Var|Abs) + ReportCount(4) // Padding + Input(Const) + Usage_Dig_InRange + ReportCount(1) + Input(Var|Abs) + ReportSize(16) + ReportCount(1) + PushPop( + UsagePage_GenericDesktop + Unit(cm) + UnitExponent(-1) + // Note: reported logical range differs + // from the pen report ID for x and y + LogicalMinimum_i16(0) + LogicalMaximum_i16(32000) + PhysicalMinimum_i16(0) + PhysicalMaximum_i16(160) + // Bytes 2/3 in report + Usage_GD_X + Input(Var|Abs) + LogicalMinimum_i16(0) + LogicalMaximum_i16(20000) + PhysicalMinimum_i16(0) + PhysicalMaximum_i16(100) + // Bytes 4/5 in report + Usage_GD_Y + Input(Var|Abs) + ) + // Bytes 6/7 in report + LogicalMinimum_i16(0) + LogicalMaximum_i16(8192) + Usage_Dig_TipPressure + Input(Var|Abs) + ) + ) + UsagePage_GenericDesktop + Usage_GD_Keypad + CollectionApplication( + // Byte 0 + ReportId(PAD_REPORT_ID) + LogicalMinimum_i8(0) + LogicalMaximum_i8(1) + UsagePage_Digitizers + Usage_Dig_TabletFunctionKeys + CollectionPhysical( + // Byte 1 are the buttons + Usage_Dig_BarrelSwitch // BTN_STYLUS, needed so we get to be a tablet pad + ReportCount(1) + ReportSize(1) + Input(Var|Abs) + ReportCount(7) // Padding + Input(Const) + // Bytes 2/3 - x/y just exist so we get to be a tablet pad + UsagePage_GenericDesktop + Usage_GD_X + Usage_GD_Y + ReportCount(2) + ReportSize(8) + Input(Var|Abs) + // Bytes 4 and 5 are the button state + UsagePage_Button + UsageMinimum_i8(0x1) + UsageMaximum_i8(0xa) + LogicalMinimum_i8(0x0) + LogicalMaximum_i8(0x1) + ReportCount(10) + ReportSize(1) + Input(Var|Abs) + Usage_i8(0x31) // maps to BTN_SOUTH + ReportCount(1) + Input(Var|Abs) + ReportCount(5) + Input(Const) + // Byte 6 is the wheel + UsagePage_GenericDesktop + Usage_GD_Wheel + LogicalMinimum_i8(-1) + LogicalMaximum_i8(1) + ReportCount(1) + ReportSize(8) + Input(Var|Rel) + ) + // Make sure we match our original report length + FixedSizeVendorReport(VENDOR_REPORT_LENGTH) + ) +}; + +static const __u8 disabled_rdesc_pen[] = { + FixedSizeVendorReport(PEN_REPORT_LENGTH) +}; + +static const __u8 disabled_rdesc_pad[] = { + FixedSizeVendorReport(PAD_REPORT_LENGTH) +}; + +SEC(HID_BPF_RDESC_FIXUP) +int BPF_PROG(hid_fix_rdesc, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, HID_MAX_DESCRIPTOR_SIZE /* size */); + __s32 rdesc_size = hctx->size; + __u8 have_fw_id; + + if (!data) + return 0; /* EPERM check */ + + /* If we have a firmware ID and it matches our expected prefix, we + * disable the default pad/pen nodes. They won't send events + * but cause duplicate devices. + */ + have_fw_id = __builtin_memcmp(UDEV_PROP_HUION_FIRMWARE_ID, + EXPECTED_FIRMWARE_ID, + sizeof(EXPECTED_FIRMWARE_ID) - 1) == 0; + if (rdesc_size == PAD_REPORT_DESCRIPTOR_LENGTH) { + if (have_fw_id) { + __builtin_memcpy(data, disabled_rdesc_pad, sizeof(disabled_rdesc_pad)); + return sizeof(disabled_rdesc_pad); + } + + __builtin_memcpy(data, fixed_rdesc_pad, sizeof(fixed_rdesc_pad)); + return sizeof(fixed_rdesc_pad); + } + if (rdesc_size == PEN_REPORT_DESCRIPTOR_LENGTH) { + if (have_fw_id) { + __builtin_memcpy(data, disabled_rdesc_pen, sizeof(disabled_rdesc_pen)); + return sizeof(disabled_rdesc_pen); + } + + __builtin_memcpy(data, fixed_rdesc_pen, sizeof(fixed_rdesc_pen)); + return sizeof(fixed_rdesc_pen); + } + /* Always fix the vendor mode so the tablet will work even if nothing sets + * the udev property (e.g. huion-switcher run manually) + */ + if (rdesc_size == VENDOR_REPORT_DESCRIPTOR_LENGTH) { + __builtin_memcpy(data, fixed_rdesc_vendor, sizeof(fixed_rdesc_vendor)); + return sizeof(fixed_rdesc_vendor); + } + return 0; +} + +SEC(HID_BPF_DEVICE_EVENT) +int BPF_PROG(inspiroy_2_fix_events, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 10 /* size */); + + if (!data) + return 0; /* EPERM check */ + + /* Only sent if tablet is in default mode */ + if (data[0] == PAD_REPORT_ID) { + /* Nicely enough, this device only supports one button down at a time so + * the reports are easy to match. Buttons numbered from the top + * Button released: 03 00 00 00 00 00 00 00 + * Button 1: 03 00 05 00 00 00 00 00 -> b + * Button 2: 03 07 11 00 00 00 00 00 -> Ctrl Shift N + * Button 3: 03 00 08 00 00 00 00 00 -> e + * Button 4: 03 00 0c 00 00 00 00 00 -> i + * Button 5: 03 00 2c 00 00 00 00 00 -> space + * Button 6: 03 01 08 00 00 00 00 00 -> Ctrl E + * Button 7: 03 01 16 00 00 00 00 00 -> Ctrl S + * Button 8: 03 05 1d 00 00 00 00 00 -> Ctrl Alt Z + * + * Wheel down: 03 01 2d 00 00 00 00 00 -> Ctrl - + * Wheel up: 03 01 2e 00 00 00 00 00 -> Ctrl = + */ + __u8 button = 0; + __u8 wheel = 0; + + switch (data[1] << 8 | data[2]) { + case 0x0000: + break; + case 0x0005: + button = 1; + break; + case 0x0711: + button = 2; + break; + case 0x0008: + button = 3; + break; + case 0x000c: + button = 4; + break; + case 0x002c: + button = 5; + break; + case 0x0108: + button = 6; + break; + case 0x0116: + button = 7; + break; + case 0x051d: + button = 8; + break; + case 0x012d: + wheel = -1; + break; + case 0x012e: + wheel = 1; + break; + } + + __u8 report[6] = {PAD_REPORT_ID, 0x0, 0x0, 0x0, wheel, button}; + + __builtin_memcpy(data, report, sizeof(report)); + return sizeof(report); + } + + /* Nothing to do for the PEN_REPORT_ID, it's already mapped */ + + /* Only sent if tablet is in raw mode */ + if (data[0] == VENDOR_REPORT_ID) { + /* Pad reports */ + if (data[1] & 0x20) { + /* See fixed_rdesc_pad */ + struct pad_report { + __u8 report_id; + __u8 btn_stylus; + __u8 x; + __u8 y; + __u16 buttons; + __u8 wheel; + } __attribute__((packed)) *pad_report; + __u8 wheel = 0; + + /* Wheel report */ + if (data[1] == 0xf1) { + if (data[5] == 2) + wheel = 0xff; + else + wheel = data[5]; + } else { + /* data[4] and data[5] are the buttons, mapped correctly */ + last_button_state = data[4] | (data[5] << 8); + wheel = 0; // wheel + } + + pad_report = (struct pad_report *)data; + + pad_report->report_id = PAD_REPORT_ID; + pad_report->btn_stylus = 0; + pad_report->x = 0; + pad_report->y = 0; + pad_report->buttons = last_button_state; + pad_report->wheel = wheel; + + return sizeof(struct pad_report); + } + + /* Pen reports need nothing done */ + } + + return 0; +} + +HID_BPF_OPS(inspiroy_2) = { + .hid_device_event = (void *)inspiroy_2_fix_events, + .hid_rdesc_fixup = (void *)hid_fix_rdesc, +}; + +SEC("syscall") +int probe(struct hid_bpf_probe_args *ctx) +{ + switch (ctx->rdesc_size) { + case PAD_REPORT_DESCRIPTOR_LENGTH: + case PEN_REPORT_DESCRIPTOR_LENGTH: + case VENDOR_REPORT_DESCRIPTOR_LENGTH: + ctx->retval = 0; + break; + default: + ctx->retval = -EINVAL; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; From 5f2e058df65cac37aeeeb74fef650edd51fe1882 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:23 +0100 Subject: [PATCH 1067/1075] HID: bpf: add support for Huion Kamvas 13 (Gen 3) (model GS1333) This assumes that the tablet has been switched into vendor mode (by using huion-switcher[1], for example) and is sending events using Huion's proprietary data format. This has been tested using the PW600L pen, which does not have an eraser. There is no expectation that a pen with an eraser will work at this time. [1] https://github.com/whot/huion-switcher Signed-off-by: Nicholas LaPointe Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/162 Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .../hid/bpf/progs/Huion__Kamvas13Gen3.bpf.c | 1395 +++++++++++++++++ 1 file changed, 1395 insertions(+) create mode 100644 drivers/hid/bpf/progs/Huion__Kamvas13Gen3.bpf.c diff --git a/drivers/hid/bpf/progs/Huion__Kamvas13Gen3.bpf.c b/drivers/hid/bpf/progs/Huion__Kamvas13Gen3.bpf.c new file mode 100644 index 000000000000..b63f9a48ea45 --- /dev/null +++ b/drivers/hid/bpf/progs/Huion__Kamvas13Gen3.bpf.c @@ -0,0 +1,1395 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Nicholas LaPointe + */ + +#include "vmlinux.h" +#include "hid_bpf.h" +#include "hid_bpf_helpers.h" +#include "hid_report_helpers.h" +#include + +#define VID_HUION 0x256c +#define PID_KAMVAS13_GEN3 0x2008 + +#define VENDOR_DESCRIPTOR_LENGTH 36 +#define TABLET_DESCRIPTOR_LENGTH 368 +#define WHEEL_DESCRIPTOR_LENGTH 108 + +#define VENDOR_REPORT_ID 8 +#define VENDOR_REPORT_LENGTH 14 + +#define VENDOR_REPORT_SUBTYPE_PEN 0x08 +#define VENDOR_REPORT_SUBTYPE_PEN_OUT 0x00 +#define VENDOR_REPORT_SUBTYPE_BUTTONS 0x0e +#define VENDOR_REPORT_SUBTYPE_WHEELS 0x0f + +/* For the reports that we create ourselves */ +#define CUSTOM_PAD_REPORT_ID 9 + +HID_BPF_CONFIG( + HID_DEVICE(BUS_USB, HID_GROUP_ANY, VID_HUION, PID_KAMVAS13_GEN3), +); + + +/* + * This tablet can send reports using one of two different data formats, + * depending on what "mode" the tablet is in. + * + * By default, the tablet will send reports that can be decoded using its + * included HID descriptors (descriptors 1 and 2, shown below). + * This mode will be called "firmware mode" throughout this file. + * + * The HID descriptor that describes pen events in firmware mode (descriptor 1) + * has multiple bugs: + * * "Secondary Tip Switch" instead of "Secondary Barrel Switch" + * * "Invert" instead of (or potentially shared with) third barrel button + * * Specified tablet area of 2048 in³ instead of 293.8 x 165.2mm + * * Specified tilt range of -90 to +90 instead of -60 to +60 + * + * While these can be easily patched up by editing the descriptor, a larger + * problem with the firmware mode exists: it is impossible to tell which of the + * two wheels are being rotated (or having their central button pressed). + * + * + * By using a tool such as huion-switcher (https://github.com/whot/huion-switcher), + * the tablet can be made to send reports using a proprietary format that is not + * adequately described by its relevant descriptor (descriptor 0, shown below). + * This mode will be called "vendor mode" throughout this file. + * + * The reports sent while in vendor mode allow for proper decoding of the wheels. + * + * For simplicity and maximum functionality, this BPF focuses strictly on + * enabling one to make use of the vendor mode. + */ + +/* + * DESCRIPTORS + * DESCRIPTOR 0 + * # 0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page FF00) 0 + * # 0x09, 0x01, // Usage (Vendor Usage 0x01) 3 + * # 0xa1, 0x01, // Collection (Application) 5 + * # ┅ 0x85, 0x08, // Report ID (8) 7 + * # 0x75, 0x68, // Report Size (104) 9 + * # 0x95, 0x01, // Report Count (1) 11 + * # 0x09, 0x01, // Usage (Vendor Usage 0x01) 13 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 15 + * # 0xc0, // End Collection 17 + * # 0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page FF00) 18 + * # 0x09, 0x01, // Usage (Vendor Usage 0x01) 21 + * # 0xa1, 0x01, // Collection (Application) 23 + * # ┅ 0x85, 0x16, // Report ID (22) 25 + * # 0x75, 0x08, // Report Size (8) 27 + * # 0x95, 0x07, // Report Count (7) 29 + * # 0x09, 0x01, // Usage (Vendor Usage 0x01) 31 + * # ║ 0xb1, 0x02, // Feature (Data,Var,Abs) 33 + * # 0xc0, // End Collection 35 + * R: 36 06 00 ff 09 01 a1 01 85 08 75 68 95 01 09 01 81 02 c0 06 00 ff 09 01 a1 01 85 16 75 08 95 07 09 01 b1 02 c0 + * N: HUION Huion Tablet_GS1333 + * I: 3 256c 2008 + * + * DESCRIPTOR 1 + * # 0x05, 0x0d, // Usage Page (Digitizers) 0 + * # 0x09, 0x02, // Usage (Pen) 2 + * # 0xa1, 0x01, // Collection (Application) 4 + * # ┅ 0x85, 0x0a, // Report ID (10) 6 + * # 0x09, 0x20, // Usage (Stylus) 8 + * # 0xa1, 0x01, // Collection (Application) 10 + * # 0x09, 0x42, // Usage (Tip Switch) 12 + * # 0x09, 0x44, // Usage (Barrel Switch) 14 + * # 0x09, 0x43, // Usage (Secondary Tip Switch) 16 + * # 0x09, 0x3c, // Usage (Invert) 18 + * # 0x09, 0x45, // Usage (Eraser) 20 + * # 0x15, 0x00, // Logical Minimum (0) 22 + * # 0x25, 0x01, // Logical Maximum (1) 24 + * # 0x75, 0x01, // Report Size (1) 26 + * # 0x95, 0x06, // Report Count (6) 28 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 30 + * # 0x09, 0x32, // Usage (In Range) 32 + * # 0x75, 0x01, // Report Size (1) 34 + * # 0x95, 0x01, // Report Count (1) 36 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 38 + * # ┇ 0x81, 0x03, // Input (Cnst,Var,Abs) 40 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 42 + * # 0x09, 0x30, // Usage (X) 44 + * # 0x09, 0x31, // Usage (Y) 46 + * # 0x55, 0x0d, // Unit Exponent (-3) 48 + * # 0x65, 0x33, // Unit (EnglishLinear: in³) 50 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 52 + * # 0x35, 0x00, // Physical Minimum (0) 55 + * # 0x46, 0x00, 0x08, // Physical Maximum (2048) 57 + * # 0x75, 0x10, // Report Size (16) 60 + * # 0x95, 0x02, // Report Count (2) 62 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 64 + * # 0x05, 0x0d, // Usage Page (Digitizers) 66 + * # 0x09, 0x30, // Usage (Tip Pressure) 68 + * # 0x26, 0xff, 0x3f, // Logical Maximum (16383) 70 + * # 0x75, 0x10, // Report Size (16) 73 + * # 0x95, 0x01, // Report Count (1) 75 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 77 + * # 0x09, 0x3d, // Usage (X Tilt) 79 + * # 0x09, 0x3e, // Usage (Y Tilt) 81 + * # 0x15, 0xa6, // Logical Minimum (-90) 83 + * # 0x25, 0x5a, // Logical Maximum (90) 85 + * # 0x75, 0x08, // Report Size (8) 87 + * # 0x95, 0x02, // Report Count (2) 89 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 91 + * # 0xc0, // End Collection 93 + * # 0xc0, // End Collection 94 + * # 0x05, 0x0d, // Usage Page (Digitizers) 95 + * # 0x09, 0x04, // Usage (Touch Screen) 97 + * # 0xa1, 0x01, // Collection (Application) 99 + * # ┅ 0x85, 0x04, // Report ID (4) 101 + * # 0x09, 0x22, // Usage (Finger) 103 + * # 0xa1, 0x02, // Collection (Logical) 105 + * # 0x05, 0x0d, // Usage Page (Digitizers) 107 + * # 0x95, 0x01, // Report Count (1) 109 + * # 0x75, 0x06, // Report Size (6) 111 + * # 0x09, 0x51, // Usage (Contact Identifier) 113 + * # 0x15, 0x00, // Logical Minimum (0) 115 + * # 0x25, 0x3f, // Logical Maximum (63) 117 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 119 + * # 0x09, 0x42, // Usage (Tip Switch) 121 + * # 0x25, 0x01, // Logical Maximum (1) 123 + * # 0x75, 0x01, // Report Size (1) 125 + * # 0x95, 0x01, // Report Count (1) 127 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 129 + * # 0x75, 0x01, // Report Size (1) 131 + * # 0x95, 0x01, // Report Count (1) 133 + * # ┇ 0x81, 0x03, // Input (Cnst,Var,Abs) 135 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 137 + * # 0x75, 0x10, // Report Size (16) 139 + * # 0x55, 0x0e, // Unit Exponent (-2) 141 + * # 0x65, 0x11, // Unit (SILinear: cm) 143 + * # 0x09, 0x30, // Usage (X) 145 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 147 + * # 0x35, 0x00, // Physical Minimum (0) 150 + * # 0x46, 0x15, 0x0c, // Physical Maximum (3093) 152 + * # ┇ 0x81, 0x42, // Input (Data,Var,Abs,Null) 155 + * # 0x09, 0x31, // Usage (Y) 157 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 159 + * # 0x46, 0xcb, 0x06, // Physical Maximum (1739) 162 + * # ┇ 0x81, 0x42, // Input (Data,Var,Abs,Null) 165 + * # 0x05, 0x0d, // Usage Page (Digitizers) 167 + * # 0x09, 0x30, // Usage (Tip Pressure) 169 + * # 0x26, 0xff, 0x1f, // Logical Maximum (8191) 171 + * # 0x75, 0x10, // Report Size (16) 174 + * # 0x95, 0x01, // Report Count (1) 176 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 178 + * # 0xc0, // End Collection 180 + * # 0x05, 0x0d, // Usage Page (Digitizers) 181 + * # 0x09, 0x22, // Usage (Finger) 183 + * # 0xa1, 0x02, // Collection (Logical) 185 + * # 0x05, 0x0d, // Usage Page (Digitizers) 187 + * # 0x95, 0x01, // Report Count (1) 189 + * # 0x75, 0x06, // Report Size (6) 191 + * # 0x09, 0x51, // Usage (Contact Identifier) 193 + * # 0x15, 0x00, // Logical Minimum (0) 195 + * # 0x25, 0x3f, // Logical Maximum (63) 197 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 199 + * # 0x09, 0x42, // Usage (Tip Switch) 201 + * # 0x25, 0x01, // Logical Maximum (1) 203 + * # 0x75, 0x01, // Report Size (1) 205 + * # 0x95, 0x01, // Report Count (1) 207 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 209 + * # 0x75, 0x01, // Report Size (1) 211 + * # 0x95, 0x01, // Report Count (1) 213 + * # ┇ 0x81, 0x03, // Input (Cnst,Var,Abs) 215 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 217 + * # 0x75, 0x10, // Report Size (16) 219 + * # 0x55, 0x0e, // Unit Exponent (-2) 221 + * # 0x65, 0x11, // Unit (SILinear: cm) 223 + * # 0x09, 0x30, // Usage (X) 225 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 227 + * # 0x35, 0x00, // Physical Minimum (0) 230 + * # 0x46, 0x15, 0x0c, // Physical Maximum (3093) 232 + * # ┇ 0x81, 0x42, // Input (Data,Var,Abs,Null) 235 + * # 0x09, 0x31, // Usage (Y) 237 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 239 + * # 0x46, 0xcb, 0x06, // Physical Maximum (1739) 242 + * # ┇ 0x81, 0x42, // Input (Data,Var,Abs,Null) 245 + * # 0x05, 0x0d, // Usage Page (Digitizers) 247 + * # 0x09, 0x30, // Usage (Tip Pressure) 249 + * # 0x26, 0xff, 0x1f, // Logical Maximum (8191) 251 + * # 0x75, 0x10, // Report Size (16) 254 + * # 0x95, 0x01, // Report Count (1) 256 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 258 + * # 0xc0, // End Collection 260 + * # 0x05, 0x0d, // Usage Page (Digitizers) 261 + * # 0x09, 0x56, // Usage (Scan Time) 263 + * # 0x55, 0x00, // Unit Exponent (0) 265 + * # 0x65, 0x00, // Unit (None) 267 + * # 0x27, 0xff, 0xff, 0xff, 0x7f, // Logical Maximum (2147483647) 269 + * # 0x95, 0x01, // Report Count (1) 274 + * # 0x75, 0x20, // Report Size (32) 276 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 278 + * # 0x09, 0x54, // Usage (Contact Count) 280 + * # 0x25, 0x7f, // Logical Maximum (127) 282 + * # 0x95, 0x01, // Report Count (1) 284 + * # 0x75, 0x08, // Report Size (8) 286 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 288 + * # 0x75, 0x08, // Report Size (8) 290 + * # 0x95, 0x08, // Report Count (8) 292 + * # ┇ 0x81, 0x03, // Input (Cnst,Var,Abs) 294 + * # ┅ 0x85, 0x05, // Report ID (5) 296 + * # 0x09, 0x55, // Usage (Contact Count Maximum) 298 + * # 0x25, 0x0a, // Logical Maximum (10) 300 + * # 0x75, 0x08, // Report Size (8) 302 + * # 0x95, 0x01, // Report Count (1) 304 + * # ║ 0xb1, 0x02, // Feature (Data,Var,Abs) 306 + * # 0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page FF00) 308 + * # 0x09, 0xc5, // Usage (Vendor Usage 0xc5) 311 + * # ┅ 0x85, 0x06, // Report ID (6) 313 + * # 0x15, 0x00, // Logical Minimum (0) 315 + * # 0x26, 0xff, 0x00, // Logical Maximum (255) 317 + * # 0x75, 0x08, // Report Size (8) 320 + * # 0x96, 0x00, 0x01, // Report Count (256) 322 + * # ║ 0xb1, 0x02, // Feature (Data,Var,Abs) 325 + * # 0xc0, // End Collection 327 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 328 + * # 0x09, 0x06, // Usage (Keyboard) 330 + * # 0xa1, 0x01, // Collection (Application) 332 + * # ┅ 0x85, 0x03, // Report ID (3) 334 + * # 0x05, 0x07, // Usage Page (Keyboard/Keypad) 336 + * # 0x19, 0xe0, // UsageMinimum (224) 338 + * # 0x29, 0xe7, // UsageMaximum (231) 340 + * # 0x15, 0x00, // Logical Minimum (0) 342 + * # 0x25, 0x01, // Logical Maximum (1) 344 + * # 0x75, 0x01, // Report Size (1) 346 + * # 0x95, 0x08, // Report Count (8) 348 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 350 + * # 0x05, 0x07, // Usage Page (Keyboard/Keypad) 352 + * # 0x19, 0x00, // UsageMinimum (0) 354 + * # 0x29, 0xff, // UsageMaximum (255) 356 + * # 0x26, 0xff, 0x00, // Logical Maximum (255) 358 + * # 0x75, 0x08, // Report Size (8) 361 + * # 0x95, 0x06, // Report Count (6) 363 + * # ┇ 0x81, 0x00, // Input (Data,Arr,Abs) 365 + * # 0xc0, // End Collection 367 + * R: 368 05 0d 09 02 a1 01 85 0a 09 20 a1 01 09 42 09 44 09 43 09 3c 09 45 15 00 25 01 75 01 95 06 81 02 09 32 75 01 95 01 81 02 81 03 05 01 09 30 09 31 55 0d 65 33 26 ff 7f 35 00 46 00 08 75 10 95 02 81 02 05 0d 09 30 26 ff 3f 75 10 95 01 81 02 09 3d 09 3e 15 a6 25 5a 75 08 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 75 08 95 08 81 03 85 05 09 55 25 0a 75 08 95 01 b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 06 a1 01 85 03 05 07 19 e0 29 e7 15 00 25 01 75 01 95 08 81 02 05 07 19 00 29 ff 26 ff 00 75 08 95 06 81 00 c0 + * N: HUION Huion Tablet_GS1333 + * I: 3 256c 2008 + * + * DESCRIPTOR 2 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 0 + * # 0x09, 0x0e, // Usage (System Multi-Axis Controller) 2 + * # 0xa1, 0x01, // Collection (Application) 4 + * # ┅ 0x85, 0x11, // Report ID (17) 6 + * # 0x05, 0x0d, // Usage Page (Digitizers) 8 + * # 0x09, 0x21, // Usage (Puck) 10 + * # 0xa1, 0x02, // Collection (Logical) 12 + * # 0x15, 0x00, // Logical Minimum (0) 14 + * # 0x25, 0x01, // Logical Maximum (1) 16 + * # 0x75, 0x01, // Report Size (1) 18 + * # 0x95, 0x01, // Report Count (1) 20 + * # 0xa1, 0x00, // Collection (Physical) 22 + * # 0x05, 0x09, // Usage Page (Button) 24 + * # 0x09, 0x01, // Usage (Button 1) 26 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 28 + * # 0x05, 0x0d, // Usage Page (Digitizers) 30 + * # 0x09, 0x33, // Usage (Touch) 32 + * # ┇ 0x81, 0x02, // Input (Data,Var,Abs) 34 + * # 0x95, 0x06, // Report Count (6) 36 + * # ┇ 0x81, 0x03, // Input (Cnst,Var,Abs) 38 + * # 0xa1, 0x02, // Collection (Logical) 40 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 42 + * # 0x09, 0x37, // Usage (Dial) 44 + * # 0x16, 0x00, 0x80, // Logical Minimum (-32768) 46 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 49 + * # 0x75, 0x10, // Report Size (16) 52 + * # 0x95, 0x01, // Report Count (1) 54 + * # ┇ 0x81, 0x06, // Input (Data,Var,Rel) 56 + * # 0x35, 0x00, // Physical Minimum (0) 58 + * # 0x46, 0x10, 0x0e, // Physical Maximum (3600) 60 + * # 0x15, 0x00, // Logical Minimum (0) 63 + * # 0x26, 0x10, 0x0e, // Logical Maximum (3600) 65 + * # 0x09, 0x48, // Usage (Resolution Multiplier) 68 + * # ║ 0xb1, 0x02, // Feature (Data,Var,Abs) 70 + * # 0x45, 0x00, // Physical Maximum (0) 72 + * # 0xc0, // End Collection 74 + * # 0x75, 0x08, // Report Size (8) 75 + * # 0x95, 0x01, // Report Count (1) 77 + * # ┇ 0x81, 0x01, // Input (Cnst,Arr,Abs) 79 + * # 0x75, 0x08, // Report Size (8) 81 + * # 0x95, 0x01, // Report Count (1) 83 + * # ┇ 0x81, 0x01, // Input (Cnst,Arr,Abs) 85 + * # 0x75, 0x08, // Report Size (8) 87 + * # 0x95, 0x01, // Report Count (1) 89 + * # ┇ 0x81, 0x01, // Input (Cnst,Arr,Abs) 91 + * # 0x75, 0x08, // Report Size (8) 93 + * # 0x95, 0x01, // Report Count (1) 95 + * # ┇ 0x81, 0x01, // Input (Cnst,Arr,Abs) 97 + * # 0x75, 0x08, // Report Size (8) 99 + * # 0x95, 0x01, // Report Count (1) 101 + * # ┇ 0x81, 0x01, // Input (Cnst,Arr,Abs) 103 + * # 0xc0, // End Collection 105 + * # 0xc0, // End Collection 106 + * # 0xc0, // End Collection 107 + * R: 108 05 01 09 0e a1 01 85 11 05 0d 09 21 a1 02 15 00 25 01 75 01 95 01 a1 00 05 09 09 01 81 02 05 0d 09 33 81 02 95 06 81 03 a1 02 05 01 09 37 16 00 80 26 ff 7f 75 10 95 01 81 06 35 00 46 10 0e 15 00 26 10 0e 09 48 b1 02 45 00 c0 75 08 95 01 81 01 75 08 95 01 81 01 75 08 95 01 81 01 75 08 95 01 81 01 75 08 95 01 81 01 c0 c0 c0 + * N: HUION Huion Tablet_GS1333 + * I: 3 256c 2008 + * + * + * + * + * + * + * + * + * VENDOR MODE + * HUION_FIRMWARE_ID="HUION_M22c_240606" + * HUION_MAGIC_BYTES="140388e500108100ff3fd8130307008008004010" + * + * MAGIC BYTES + * [LogicalMaximum, X] [LogicalMaximum, Y] [LogicalMaximum, Pressure] [ LPI] + * 14 03 [ 88 e5] 00 [ 10 81] 00 [ ff 3f] [d8 13] 03 07 00 80 08 00 40 10 + * + * + * HIDRAW 0 + * DESCRIPTIONS + * report_subtype = (data[1] >> 4) & 0x0f + * + * REPORT SUBTYPES + * 0x0e Buttons + * (data[4] & 0x01) button 1 + * (data[4] & 0x02) button 2 + * (data[4] & 0x04) button 3 + * (data[4] & 0x08) button 4 + * (data[4] & 0x10) button 5 + * (data[4] & 0x20) button 6 (top wheel button) + * (data[4] & 0x40) button 7 (bottom wheel button) + * + * All tablet buttons release with the same report: + * 08 e0 01 01 00 00 00 00 00 00 00 00 00 00 + * + * Despite data[4] looking like a bit field, only one button + * can be unambiguously tracked at a time. + * (See NOTES ON SIMULTANEOUS BUTTON HOLDS at the end of this + * comment for examples of the confusion this can create.) + * + * All buttons, with the exceptions of 6 and 7, will repeatedly + * report a press event approximately every 225ms while held. + * + * 0x0f Wheels + * data[3] == 1: top wheel + * data[3] == 2: bottom wheel + * data[5] == 1: clockwise + * data[5] == 2: counter-clockwise + * + * 0x08/0x00 Pen + * report_subtype == 0x08: in-range + * report_subtype == 0x00: out-of-range + * For clarity, this is also equivalent to: + * (data[1] & 0x80) in-range + * + * Switches + * (data[1] & 0x01) tip switch + * (data[1] & 0x02) barrel switch + * (data[1] & 0x04) secondary barrel switch + * (data[1] & 0x08) third barrel switch + * + * Unfortunately, I don't have a pen with an eraser, so I can't + * confirm where the invert and eraser bits reside. + * If we guess using the definitions from HID descriptor 1, + * then they might be... + * (data[1] & 0x08) invert (conflicts with third barrel switch) + * (data[1] & 0x10) eraser + * + * data[2], data[3] X (little-endian, maximum 0xe588) + * + * data[4], data[5] Y (little-endian, maximum 0x8110) + * + * data[6], data[7] Pressure (little-endian, maximum 0x3fff) + * + * data[10] X tilt (signed, -60 to +60) + * data[11] Y tilt (signed, -60 to +60, inverted) + * + * + * EXAMPLE REPORTS + * Top wheel button, press, hold, then release + * E: 000000.000040 14 08 e0 01 01 20 00 00 00 00 00 00 00 00 00 + * E: 000001.531559 14 08 e0 01 01 00 00 00 00 00 00 00 00 00 00 + * + * Bottom wheel button, press, hold, then release + * E: 000002.787603 14 08 e0 01 01 40 00 00 00 00 00 00 00 00 00 + * E: 000004.215609 14 08 e0 01 01 00 00 00 00 00 00 00 00 00 00 + * + * + * Top wheel rotation, one detent CW + * E: 000194.003899 14 08 f1 01 01 00 01 00 00 00 00 00 00 00 00 + * + * Top wheel rotation, one detent CCW + * E: 000194.997812 14 08 f1 01 01 00 02 00 00 00 00 00 00 00 00 + * + * Bottom wheel rotation, one detent CW + * E: 000196.693840 14 08 f1 01 02 00 01 00 00 00 00 00 00 00 00 + * + * Bottom wheel rotation, one detent CCW + * E: 000197.757895 14 08 f1 01 02 00 02 00 00 00 00 00 00 00 00 + * + * + * Button 1, press, hold, then release + * E: 000000.000149 14 08 e0 01 01 01 00 00 00 00 00 00 00 00 00 < press + * E: 000000.447598 14 08 e0 01 01 01 00 00 00 00 00 00 00 00 00 < starting to auto-repeat, every ~225ms + * E: 000000.673586 14 08 e0 01 01 01 00 00 00 00 00 00 00 00 00 + * E: 000000.900582 14 08 e0 01 01 01 00 00 00 00 00 00 00 00 00 + * E: 000001.126703 14 08 e0 01 01 01 00 00 00 00 00 00 00 00 00 + * E: 000001.347706 14 08 e0 01 01 01 00 00 00 00 00 00 00 00 00 + * E: 000001.533721 14 08 e0 01 01 00 00 00 00 00 00 00 00 00 00 < release + * + * Button 2, press, hold, then release + * E: 000003.304735 14 08 e0 01 01 02 00 00 00 00 00 00 00 00 00 < press + * E: 000003.746743 14 08 e0 01 01 02 00 00 00 00 00 00 00 00 00 < starting to auto-repeat, every ~225ms + * E: 000003.973741 14 08 e0 01 01 02 00 00 00 00 00 00 00 00 00 + * E: 000004.199832 14 08 e0 01 01 02 00 00 00 00 00 00 00 00 00 + * E: 000004.426732 14 08 e0 01 01 02 00 00 00 00 00 00 00 00 00 + * E: 000004.647738 14 08 e0 01 01 02 00 00 00 00 00 00 00 00 00 + * E: 000004.874733 14 08 e0 01 01 02 00 00 00 00 00 00 00 00 00 + * E: 000004.930713 14 08 e0 01 01 00 00 00 00 00 00 00 00 00 00 < release + * + * Button 3, press, hold, then release + * E: 000006.650346 14 08 e0 01 01 04 00 00 00 00 00 00 00 00 00 < press + * E: 000007.051782 14 08 e0 01 01 04 00 00 00 00 00 00 00 00 00 < starting to auto-repeat, every ~225ms + * E: 000007.273738 14 08 e0 01 01 04 00 00 00 00 00 00 00 00 00 + * E: 000007.499794 14 08 e0 01 01 04 00 00 00 00 00 00 00 00 00 + * E: 000007.726725 14 08 e0 01 01 04 00 00 00 00 00 00 00 00 00 + * E: 000007.947765 14 08 e0 01 01 04 00 00 00 00 00 00 00 00 00 + * E: 000008.174755 14 08 e0 01 01 04 00 00 00 00 00 00 00 00 00 + * E: 000008.328786 14 08 e0 01 01 00 00 00 00 00 00 00 00 00 00 < release + * + * Button 4, press, hold, then release + * E: 000009.893820 14 08 e0 01 01 08 00 00 00 00 00 00 00 00 00 < press + * E: 000010.274781 14 08 e0 01 01 08 00 00 00 00 00 00 00 00 00 < starting to auto-repeat, every ~225ms + * E: 000010.500931 14 08 e0 01 01 08 00 00 00 00 00 00 00 00 00 + * E: 000010.722777 14 08 e0 01 01 08 00 00 00 00 00 00 00 00 00 + * E: 000010.948778 14 08 e0 01 01 08 00 00 00 00 00 00 00 00 00 + * E: 000011.175799 14 08 e0 01 01 08 00 00 00 00 00 00 00 00 00 + * E: 000011.401153 14 08 e0 01 01 08 00 00 00 00 00 00 00 00 00 + * E: 000011.432114 14 08 e0 01 01 00 00 00 00 00 00 00 00 00 00 < release + * + * Button 5, press, hold, then release + * E: 000013.007778 14 08 e0 01 01 10 00 00 00 00 00 00 00 00 00 < press + * E: 000013.424741 14 08 e0 01 01 10 00 00 00 00 00 00 00 00 00 < starting to auto-repeat, every ~225ms + * E: 000013.651715 14 08 e0 01 01 10 00 00 00 00 00 00 00 00 00 + * E: 000013.872763 14 08 e0 01 01 10 00 00 00 00 00 00 00 00 00 + * E: 000014.099789 14 08 e0 01 01 10 00 00 00 00 00 00 00 00 00 + * E: 000014.325734 14 08 e0 01 01 10 00 00 00 00 00 00 00 00 00 + * E: 000014.438080 14 08 e0 01 01 00 00 00 00 00 00 00 00 00 00 < release + * + * + * Pen: Top-left, then out of range + * E: 000368.572184 14 08 80 00 00 00 00 00 00 00 00 fb ed 03 00 + * E: 000368.573030 14 08 00 00 00 00 00 00 00 00 00 fb ed 03 00 + * + * Pen: Bottom-right, then out of range + * E: 000544.433185 14 08 80 88 e5 10 81 00 00 00 00 00 00 03 00 + * E: 000544.434183 14 08 00 88 e5 10 81 00 00 00 00 00 00 03 00 + * + * Pen: Max Y tilt (tip of pen points down) + * E: 000002.231927 14 08 80 f5 5d 6c 36 00 00 00 00 09 3c 03 00 + * + * Pen: Min Y Tilt (tip of pen points up) + * E: 000657.593338 14 08 80 5f 69 fa 2c 00 00 00 00 fe c4 03 00 + * + * Pen: Max X tilt (tip of pen points left) + * E: 000742.246503 14 08 80 2a 4f c4 38 00 00 00 00 3c ed 03 00 + * + * Pen: Min X Tilt (tip of pen points right) + * E: 000776.404446 14 08 00 18 85 7c 3b 00 00 00 00 c4 ed 03 00 + * + * Pen: Tip switch, max pressure, then low pressure + * E: 001138.935675 14 08 81 d2 66 04 40 ff 3f 00 00 00 08 03 00 + * + * E: 001142.403715 14 08 81 9d 69 47 3e 82 04 00 00 00 07 03 00 + * + * Pen: Barrel switch + * E: 001210.645652 14 08 82 0d 72 ea 2b 00 00 00 00 db c4 03 00 + * + * Pen: Secondary barrel switch + * E: 001211.519729 14 08 84 2c 71 51 2b 00 00 00 00 da c4 03 00 + * + * Pen: Third switch + * E: 001212.443722 14 08 88 1d 72 df 2b 00 00 00 00 dc c4 03 00 + * + * + * HIDRAW 1 + * No reports + * + * + * HIDRAW 2 + * No reports + * + * + * + * + * + * + * + * + * FIRMWARE MODE + * HIDRAW 0 + * No reports + * + * + * HIDRAW 1 + * EXAMPLE REPORTS + * Top wheel button, *release* + * E: 000067.043739 8 03 00 00 00 00 00 00 00 + * + * Bottom wheel button, *release* + * E: 000068.219161 8 03 00 00 00 00 00 00 00 + * + * + * Button 1, press, then release + * E: 000163.767870 8 03 00 05 00 00 00 00 00 + * E: 000165.969193 8 03 00 00 00 00 00 00 00 + * + * Button 2, press, then release + * E: 000261.728935 8 03 05 11 00 00 00 00 00 + * E: 000262.956220 8 03 00 00 00 00 00 00 00 + * + * Button 3, press, then release + * E: 000289.127881 8 03 01 16 00 00 00 00 00 + * E: 000290.014594 8 03 00 00 00 00 00 00 00 + * + * Button 4, press, then release + * E: 000303.025839 8 03 00 2c 00 00 00 00 00 + * E: 000303.994479 8 03 00 00 00 00 00 00 00 + * + * Button 5, press, then release + * E: 000315.500835 8 03 05 1d 00 00 00 00 00 + * E: 000316.603274 8 03 00 00 00 00 00 00 00 + * + * BUTTON SUMMARY + * 1 E: 000163.767870 8 03 00 05 00 00 00 00 00 Keyboard: B + * 2 E: 000261.728935 8 03 05 11 00 00 00 00 00 Keyboard: LCtrl+LAlt N + * 3 E: 000289.127881 8 03 01 16 00 00 00 00 00 Keyboard: LCtrl S + * 4 E: 000303.025839 8 03 00 2c 00 00 00 00 00 Keyboard: Space + * 5 E: 000315.500835 8 03 05 1d 00 00 00 00 00 Keyboard: LCtrl+LAlt + * + * All buttons (including the wheel buttons) release the same way: + * 03 00 00 00 00 00 00 00 + * + * + * Pen: Top-left, then out of range + * E: 000063.196828 10 0a c0 00 00 00 00 00 00 00 02 + * E: 000063.197762 10 0a 00 00 00 00 00 00 00 00 02 + * + * Pen: Bottom-right, then out of range + * E: 000197.123138 10 0a c0 ff 7f ff 7f 00 00 00 00 + * E: 000197.124915 10 0a 00 ff 7f ff 7f 00 00 00 00 + * + * Pen: Max Y Tilt (tip of pen points up) + * E: 000291.399541 10 0a c0 19 32 0b 58 00 00 00 3c + * + * Pen: Min Y tilt (tip of pen points down) + * E: 000340.888288 10 0a c0 85 40 89 6e 00 00 17 c4 + * + * Pen: Max X tilt (tip of pen points left) + * E: 000165.575115 10 0a c0 a7 34 99 42 00 00 3c f4 + * + * Pen: Min X Tilt (tip of pen points right) + * E: 000129.507883 10 0a c0 ea 4b 08 40 00 00 c4 1a + * + * Pen: Tip switch, max pressure, then low pressure + * E: 000242.077160 10 0a c1 7e 3c 12 31 ff 3f 03 fd + * + * E: 000339.139188 10 0a c1 ee 3a 9e 32 b5 00 06 f6 + * + * Pen: Barrel switch + * E: 000037.949777 10 0a c2 5c 28 47 2a 00 00 f6 3c + * + * Pen: Secondary barrel switch + * E: 000038.320840 10 0a c4 e4 27 fd 29 00 00 f3 38 + * + * Pen: Third switch + * E: 000038.923822 10 0a c8 97 27 5f 29 00 00 f2 33 + * + * + * HIDRAW 2 + * EXAMPLE REPORTS + * Either wheel rotation, one detent CW + * E: 000097.276573 9 11 00 01 00 00 00 00 00 00 + * + * Either wheel rotation, one detent CCW + * E: 000153.416538 9 11 00 ff ff 00 00 00 00 00 + * + * Either wheel rotation, increasing rotation speed CW + * (Note that the wheels on my particular tablet may be + * damaged, so the false rotation direction changes + * that can be observed might not happen on other units.) + * E: 000210.514925 9 11 00 01 00 00 00 00 00 00 + * E: 000210.725718 9 11 00 01 00 00 00 00 00 00 + * E: 000210.924009 9 11 00 01 00 00 00 00 00 00 + * E: 000211.205629 9 11 00 01 00 00 00 00 00 00 + * E: 000211.280521 9 11 00 0b 00 00 00 00 00 00 + * E: 000211.340121 9 11 00 0e 00 00 00 00 00 00 + * E: 000211.404018 9 11 00 0d 00 00 00 00 00 00 + * E: 000211.462060 9 11 00 0e 00 00 00 00 00 00 + * E: 000211.544886 9 11 00 0a 00 00 00 00 00 00 + * E: 000211.606130 9 11 00 0d 00 00 00 00 00 00 + * E: 000211.674560 9 11 00 0c 00 00 00 00 00 00 + * E: 000211.712039 9 11 00 16 00 00 00 00 00 00 + * E: 000211.748076 9 11 00 17 00 00 00 00 00 00 + * E: 000211.786016 9 11 00 17 00 00 00 00 00 00 + * E: 000211.832960 9 11 00 11 00 00 00 00 00 00 + * E: 000211.874081 9 11 00 14 00 00 00 00 00 00 + * E: 000211.925094 9 11 00 10 00 00 00 00 00 00 + * E: 000211.959048 9 11 00 18 00 00 00 00 00 00 + * E: 000212.006937 9 11 00 11 00 00 00 00 00 00 + * E: 000212.050055 9 11 00 13 00 00 00 00 00 00 + * E: 000212.091947 9 11 00 14 00 00 00 00 00 00 + * E: 000212.122989 9 11 00 1a 00 00 00 00 00 00 + * E: 000212.160866 9 11 00 16 00 00 00 00 00 00 + * E: 000212.194002 9 11 00 19 00 00 00 00 00 00 + * E: 000212.242249 9 11 00 11 00 00 00 00 00 00 + * E: 000212.278061 9 11 00 18 00 00 00 00 00 00 + * E: 000212.328899 9 11 00 10 00 00 00 00 00 00 + * E: 000212.354005 9 11 00 22 00 00 00 00 00 00 + * E: 000212.398995 9 11 00 12 00 00 00 00 00 00 + * E: 000212.432050 9 11 00 19 00 00 00 00 00 00 + * E: 000212.471164 9 11 00 16 00 00 00 00 00 00 + * E: 000212.507047 9 11 00 17 00 00 00 00 00 00 + * E: 000212.540964 9 11 00 19 00 00 00 00 00 00 + * E: 000212.567942 9 11 00 1f 00 00 00 00 00 00 + * E: 000212.610007 9 11 00 14 00 00 00 00 00 00 + * E: 000212.641101 9 11 00 1b 00 00 00 00 00 00 + * E: 000212.674113 9 11 00 19 00 00 00 00 00 00 + * E: 000212.674909 9 11 00 01 00 00 00 00 00 00 + * E: 000212.677062 9 11 00 00 02 00 00 00 00 00 + * E: 000212.679048 9 11 00 55 01 00 00 00 00 00 + * E: 000212.682166 9 11 00 55 01 00 00 00 00 00 + * E: 000212.682788 9 11 00 ff ff 00 00 00 00 00 + * E: 000212.683899 9 11 00 01 00 00 00 00 00 00 + * E: 000212.685827 9 11 00 67 fe 00 00 00 00 00 + * E: 000212.686941 9 11 00 00 08 00 00 00 00 00 + * E: 000212.727840 9 11 00 14 00 00 00 00 00 00 + * E: 000212.772884 9 11 00 13 00 00 00 00 00 00 + * E: 000212.810975 9 11 00 16 00 00 00 00 00 00 + * E: 000212.811793 9 11 00 00 08 00 00 00 00 00 + * E: 000212.812683 9 11 00 01 00 00 00 00 00 00 + * E: 000212.813905 9 11 00 01 00 00 00 00 00 00 + * E: 000212.814909 9 11 00 00 04 00 00 00 00 00 + * E: 000212.816942 9 11 00 01 00 00 00 00 00 00 + * E: 000212.817851 9 11 00 ff ff 00 00 00 00 00 + * E: 000212.818752 9 11 00 01 00 00 00 00 00 00 + * E: 000212.819910 9 11 00 56 fd 00 00 00 00 00 + * E: 000212.820781 9 11 00 ff ff 00 00 00 00 00 + * E: 000212.821811 9 11 00 00 04 00 00 00 00 00 + * E: 000212.822920 9 11 00 00 08 00 00 00 00 00 + * E: 000212.823861 9 11 00 00 02 00 00 00 00 00 + * E: 000212.828781 9 11 00 ba 00 00 00 00 00 00 + * E: 000212.874097 9 11 00 12 00 00 00 00 00 00 + * E: 000212.874872 9 11 00 00 fc 00 00 00 00 00 + * E: 000212.876136 9 11 00 00 fc 00 00 00 00 00 + * E: 000212.877036 9 11 00 00 f8 00 00 00 00 00 + * E: 000212.877993 9 11 00 00 f8 00 00 00 00 00 + * E: 000212.879748 9 11 00 01 00 00 00 00 00 00 + * E: 000212.880728 9 11 00 01 00 00 00 00 00 00 + * E: 000212.881956 9 11 00 00 04 00 00 00 00 00 + * E: 000212.885065 9 11 00 ff ff 00 00 00 00 00 + * E: 000212.917060 9 11 00 1a 00 00 00 00 00 00 + * E: 000212.936458 9 11 00 2d 00 00 00 00 00 00 + * E: 000212.957860 9 11 00 25 00 00 00 00 00 00 + * E: 000212.984019 9 11 00 20 00 00 00 00 00 00 + * E: 000213.017915 9 11 00 19 00 00 00 00 00 00 + * E: 000213.039973 9 11 00 27 00 00 00 00 00 00 + * E: 000213.065933 9 11 00 21 00 00 00 00 00 00 + * E: 000213.085807 9 11 00 28 00 00 00 00 00 00 + * E: 000213.108888 9 11 00 25 00 00 00 00 00 00 + * E: 000213.129726 9 11 00 29 00 00 00 00 00 00 + * E: 000213.172043 9 11 00 14 00 00 00 00 00 00 + * E: 000213.195873 9 11 00 23 00 00 00 00 00 00 + * E: 000213.222884 9 11 00 20 00 00 00 00 00 00 + * E: 000213.243220 9 11 00 2a 00 00 00 00 00 00 + * E: 000213.266778 9 11 00 24 00 00 00 00 00 00 + * E: 000213.285951 9 11 00 2b 00 00 00 00 00 00 + * E: 000213.306045 9 11 00 2a 00 00 00 00 00 00 + * E: 000213.306796 9 11 00 ff ff 00 00 00 00 00 + * E: 000213.307755 9 11 00 ff ff 00 00 00 00 00 + * E: 000213.308820 9 11 00 ff ff 00 00 00 00 00 + * E: 000213.309971 9 11 00 ff ff 00 00 00 00 00 + * E: 000213.310980 9 11 00 01 00 00 00 00 00 00 + * E: 000213.311853 9 11 00 01 00 00 00 00 00 00 + * E: 000213.312861 9 11 00 aa 02 00 00 00 00 00 + * E: 000213.313884 9 11 00 00 f8 00 00 00 00 00 + * E: 000213.315111 9 11 00 ff ff 00 00 00 00 00 + * E: 000213.315992 9 11 00 01 00 00 00 00 00 00 + * E: 000213.316955 9 11 00 00 08 00 00 00 00 00 + * E: 000213.346065 9 11 00 1d 00 00 00 00 00 00 + * E: 000213.346963 9 11 00 ff ff 00 00 00 00 00 + * E: 000213.347874 9 11 00 00 08 00 00 00 00 00 + * E: 000213.348736 9 11 00 00 08 00 00 00 00 00 + * E: 000213.349795 9 11 00 00 04 00 00 00 00 00 + * E: 000213.350791 9 11 00 01 00 00 00 00 00 00 + * E: 000213.351791 9 11 00 01 00 00 00 00 00 00 + * E: 000213.352729 9 11 00 00 f8 00 00 00 00 00 + * E: 000213.353811 9 11 00 01 00 00 00 00 00 00 + * E: 000213.354755 9 11 00 00 f8 00 00 00 00 00 + * E: 000213.355795 9 11 00 00 f8 00 00 00 00 00 + * E: 000213.356813 9 11 00 01 00 00 00 00 00 00 + * E: 000213.357817 9 11 00 00 04 00 00 00 00 00 + * E: 000213.393838 9 11 00 17 00 00 00 00 00 00 + * E: 000213.394719 9 11 00 00 04 00 00 00 00 00 + * E: 000213.395682 9 11 00 00 08 00 00 00 00 00 + * E: 000213.396679 9 11 00 00 04 00 00 00 00 00 + * E: 000213.397651 9 11 00 00 fc 00 00 00 00 00 + * E: 000213.398661 9 11 00 ff ff 00 00 00 00 00 + * E: 000213.400308 9 11 00 56 fd 00 00 00 00 00 + * E: 000213.400909 9 11 00 00 f8 00 00 00 00 00 + * E: 000213.401837 9 11 00 01 00 00 00 00 00 00 + * + * Either wheel rotation, increasing rotation speed CCW + * (Note that the wheels on my particular tablet may be + * damaged, so the false rotation direction changes + * that can be observed might not happen on other units.) + * E: 000040.527820 9 11 00 ff ff 00 00 00 00 00 + * E: 000040.816644 9 11 00 ff ff 00 00 00 00 00 + * E: 000040.880423 9 11 00 f3 ff 00 00 00 00 00 + * E: 000040.882570 9 11 00 ff ff 00 00 00 00 00 + * E: 000040.883381 9 11 00 ff ff 00 00 00 00 00 + * E: 000040.885463 9 11 00 aa 02 00 00 00 00 00 + * E: 000040.924106 9 11 00 ea ff 00 00 00 00 00 + * E: 000041.006155 9 11 00 f6 ff 00 00 00 00 00 + * E: 000041.085799 9 11 00 f6 ff 00 00 00 00 00 + * E: 000041.168492 9 11 00 f6 ff 00 00 00 00 00 + * E: 000041.233453 9 11 00 f3 ff 00 00 00 00 00 + * E: 000041.296641 9 11 00 f3 ff 00 00 00 00 00 + * E: 000041.370302 9 11 00 f5 ff 00 00 00 00 00 + * E: 000041.437410 9 11 00 f4 ff 00 00 00 00 00 + * E: 000041.474514 9 11 00 e9 ff 00 00 00 00 00 + * E: 000041.522171 9 11 00 ef ff 00 00 00 00 00 + * E: 000041.568160 9 11 00 ee ff 00 00 00 00 00 + * E: 000041.608146 9 11 00 ec ff 00 00 00 00 00 + * E: 000041.627132 9 11 00 d3 ff 00 00 00 00 00 + * E: 000041.656151 9 11 00 e3 ff 00 00 00 00 00 + * E: 000041.682264 9 11 00 e0 ff 00 00 00 00 00 + * E: 000041.714186 9 11 00 e6 ff 00 00 00 00 00 + * E: 000041.740339 9 11 00 e0 ff 00 00 00 00 00 + * E: 000041.772087 9 11 00 e5 ff 00 00 00 00 00 + * E: 000041.801093 9 11 00 e3 ff 00 00 00 00 00 + * E: 000041.834051 9 11 00 e7 ff 00 00 00 00 00 + * E: 000041.863094 9 11 00 e3 ff 00 00 00 00 00 + * E: 000041.901016 9 11 00 ea ff 00 00 00 00 00 + * E: 000041.901956 9 11 00 00 04 00 00 00 00 00 + * E: 000041.902837 9 11 00 00 fe 00 00 00 00 00 + * E: 000041.903927 9 11 00 01 00 00 00 00 00 00 + * E: 000041.905066 9 11 00 01 00 00 00 00 00 00 + * E: 000041.907214 9 11 00 00 fe 00 00 00 00 00 + * E: 000041.909011 9 11 00 01 00 00 00 00 00 00 + * E: 000041.909953 9 11 00 01 00 00 00 00 00 00 + * E: 000041.910917 9 11 00 00 08 00 00 00 00 00 + * E: 000041.913280 9 11 00 00 fe 00 00 00 00 00 + * E: 000041.914121 9 11 00 56 fd 00 00 00 00 00 + * E: 000041.915346 9 11 00 ff ff 00 00 00 00 00 + * E: 000041.962101 9 11 00 ee ff 00 00 00 00 00 + * E: 000041.964062 9 11 00 56 fd 00 00 00 00 00 + * E: 000041.964978 9 11 00 00 fc 00 00 00 00 00 + * E: 000041.968058 9 11 00 24 01 00 00 00 00 00 + * E: 000041.968880 9 11 00 56 fd 00 00 00 00 00 + * E: 000041.970977 9 11 00 aa 02 00 00 00 00 00 + * E: 000041.971932 9 11 00 ff ff 00 00 00 00 00 + * E: 000041.972943 9 11 00 01 00 00 00 00 00 00 + * E: 000041.975291 9 11 00 ff ff 00 00 00 00 00 + * E: 000041.978274 9 11 00 01 00 00 00 00 00 00 + * E: 000042.035079 9 11 00 01 00 00 00 00 00 00 + * E: 000042.041283 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.042057 9 11 00 00 04 00 00 00 00 00 + * E: 000042.045169 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.051242 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.056099 9 11 00 63 ff 00 00 00 00 00 + * E: 000042.106329 9 11 00 ef ff 00 00 00 00 00 + * E: 000042.108601 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.116259 9 11 00 6b 00 00 00 00 00 00 + * E: 000042.119140 9 11 00 55 01 00 00 00 00 00 + * E: 000042.126101 9 11 00 88 ff 00 00 00 00 00 + * E: 000042.158009 9 11 00 e6 ff 00 00 00 00 00 + * E: 000042.172108 9 11 00 be ff 00 00 00 00 00 + * E: 000042.207417 9 11 00 e8 ff 00 00 00 00 00 + * E: 000042.223155 9 11 00 cc ff 00 00 00 00 00 + * E: 000042.255185 9 11 00 e6 ff 00 00 00 00 00 + * E: 000042.276280 9 11 00 d7 ff 00 00 00 00 00 + * E: 000042.302128 9 11 00 e0 ff 00 00 00 00 00 + * E: 000042.317423 9 11 00 c8 ff 00 00 00 00 00 + * E: 000042.345226 9 11 00 e1 ff 00 00 00 00 00 + * E: 000042.357243 9 11 00 bc ff 00 00 00 00 00 + * E: 000042.381308 9 11 00 dc ff 00 00 00 00 00 + * E: 000042.383180 9 11 00 dc fe 00 00 00 00 00 + * E: 000042.412288 9 11 00 e3 ff 00 00 00 00 00 + * E: 000042.451216 9 11 00 eb ff 00 00 00 00 00 + * E: 000042.478372 9 11 00 e0 ff 00 00 00 00 00 + * E: 000042.502116 9 11 00 dd ff 00 00 00 00 00 + * E: 000042.520105 9 11 00 d3 ff 00 00 00 00 00 + * E: 000042.540345 9 11 00 d6 ff 00 00 00 00 00 + * E: 000042.541021 9 11 00 00 08 00 00 00 00 00 + * E: 000042.542009 9 11 00 01 00 00 00 00 00 00 + * E: 000042.543045 9 11 00 00 04 00 00 00 00 00 + * E: 000042.544279 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.545097 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.546074 9 11 00 00 08 00 00 00 00 00 + * E: 000042.547237 9 11 00 00 08 00 00 00 00 00 + * E: 000042.548029 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.549304 9 11 00 00 f8 00 00 00 00 00 + * E: 000042.553123 9 11 00 00 ff 00 00 00 00 00 + * E: 000042.581186 9 11 00 e1 ff 00 00 00 00 00 + * E: 000042.582238 9 11 00 00 f8 00 00 00 00 00 + * E: 000042.583150 9 11 00 00 fc 00 00 00 00 00 + * E: 000042.584273 9 11 00 00 f8 00 00 00 00 00 + * E: 000042.585019 9 11 00 00 fc 00 00 00 00 00 + * E: 000042.586059 9 11 00 01 00 00 00 00 00 00 + * E: 000042.589012 9 11 00 67 fe 00 00 00 00 00 + * E: 000042.590066 9 11 00 00 fc 00 00 00 00 00 + * E: 000042.592916 9 11 00 dc fe 00 00 00 00 00 + * E: 000042.621124 9 11 00 e1 ff 00 00 00 00 00 + * E: 000042.622092 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.623069 9 11 00 01 00 00 00 00 00 00 + * E: 000042.624030 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.625006 9 11 00 00 08 00 00 00 00 00 + * E: 000042.626068 9 11 00 00 04 00 00 00 00 00 + * E: 000042.626876 9 11 00 00 08 00 00 00 00 00 + * E: 000042.628392 9 11 00 00 08 00 00 00 00 00 + * E: 000042.628918 9 11 00 01 00 00 00 00 00 00 + * E: 000042.630009 9 11 00 ff ff 00 00 00 00 00 + * E: 000042.631934 9 11 00 00 fe 00 00 00 00 00 + * E: 000042.656285 9 11 00 dd ff 00 00 00 00 00 + * E: 000042.659870 9 11 00 cc 00 00 00 00 00 00 + * E: 000042.666128 9 11 00 9d 00 00 00 00 00 00 + * E: 000042.672458 9 11 00 80 ff 00 00 00 00 00 + * E: 000042.696106 9 11 00 dc ff 00 00 00 00 00 + * E: 000042.705129 9 11 00 61 00 00 00 00 00 00 + * E: 000042.731303 9 11 00 e0 ff 00 00 00 00 00 + * E: 000042.741278 9 11 00 ab ff 00 00 00 00 00 + * E: 000042.788181 9 11 00 ee ff 00 00 00 00 00 + * E: 000042.810441 9 11 00 db ff 00 00 00 00 00 + * E: 000042.838073 9 11 00 e1 ff 00 00 00 00 00 + * E: 000042.852235 9 11 00 c4 ff 00 00 00 00 00 + * E: 000042.882290 9 11 00 e4 ff 00 00 00 00 00 + * + * Either wheel button, press, hold, then release + * E: 000202.084982 9 11 02 00 00 00 00 00 00 00 + * E: 000202.090172 9 11 03 00 00 00 00 00 00 00 + * E: 000202.094139 9 11 03 00 00 00 00 00 00 00 + * E: 000202.099172 9 11 03 00 00 00 00 00 00 00 + * E: 000202.105055 9 11 03 00 00 00 00 00 00 00 + * E: 000202.109132 9 11 03 00 00 00 00 00 00 00 + * E: 000202.114185 9 11 03 00 00 00 00 00 00 00 + * E: 000202.119212 9 11 03 00 00 00 00 00 00 00 + * E: 000202.124264 9 11 03 00 00 00 00 00 00 00 + * E: 000202.130147 9 11 03 00 00 00 00 00 00 00 + * E: 000202.135138 9 11 03 00 00 00 00 00 00 00 + * E: 000202.140072 9 11 03 00 00 00 00 00 00 00 + * E: 000202.145146 9 11 03 00 00 00 00 00 00 00 + * E: 000202.150157 9 11 03 00 00 00 00 00 00 00 + * E: 000202.155339 9 11 03 00 00 00 00 00 00 00 + * E: 000202.160064 9 11 03 00 00 00 00 00 00 00 + * E: 000202.165026 9 11 03 00 00 00 00 00 00 00 + * E: 000202.170037 9 11 03 00 00 00 00 00 00 00 + * E: 000202.175154 9 11 03 00 00 00 00 00 00 00 + * E: 000202.180044 9 11 03 00 00 00 00 00 00 00 + * E: 000202.186280 9 11 03 00 00 00 00 00 00 00 + * E: 000202.191281 9 11 03 00 00 00 00 00 00 00 + * E: 000202.196106 9 11 03 00 00 00 00 00 00 00 + * E: 000202.201083 9 11 03 00 00 00 00 00 00 00 + * E: 000202.206166 9 11 03 00 00 00 00 00 00 00 + * E: 000202.211084 9 11 03 00 00 00 00 00 00 00 + * E: 000202.216175 9 11 03 00 00 00 00 00 00 00 + * E: 000202.221036 9 11 03 00 00 00 00 00 00 00 + * E: 000202.226271 9 11 03 00 00 00 00 00 00 00 + * E: 000202.231150 9 11 03 00 00 00 00 00 00 00 + * E: 000202.235924 9 11 03 00 00 00 00 00 00 00 + * E: 000202.242046 9 11 03 00 00 00 00 00 00 00 + * E: 000202.247164 9 11 03 00 00 00 00 00 00 00 + * E: 000202.252359 9 11 03 00 00 00 00 00 00 00 + * E: 000202.257295 9 11 03 00 00 00 00 00 00 00 + * E: 000202.262167 9 11 03 00 00 00 00 00 00 00 + * E: 000202.267081 9 11 03 00 00 00 00 00 00 00 + * E: 000202.272175 9 11 03 00 00 00 00 00 00 00 + * E: 000202.277085 9 11 03 00 00 00 00 00 00 00 + * E: 000202.282596 9 11 03 00 00 00 00 00 00 00 + * E: 000202.287078 9 11 03 00 00 00 00 00 00 00 + * E: 000202.292191 9 11 03 00 00 00 00 00 00 00 + * E: 000202.298196 9 11 03 00 00 00 00 00 00 00 + * E: 000202.303004 9 11 03 00 00 00 00 00 00 00 + * E: 000202.308113 9 11 03 00 00 00 00 00 00 00 + * E: 000202.313079 9 11 03 00 00 00 00 00 00 00 + * E: 000202.318243 9 11 03 00 00 00 00 00 00 00 + * E: 000202.323309 9 11 03 00 00 00 00 00 00 00 + * E: 000202.328190 9 11 03 00 00 00 00 00 00 00 + * E: 000202.333050 9 11 03 00 00 00 00 00 00 00 + * E: 000202.338162 9 11 03 00 00 00 00 00 00 00 + * E: 000202.343022 9 11 03 00 00 00 00 00 00 00 + * E: 000202.348113 9 11 03 00 00 00 00 00 00 00 + * E: 000202.354133 9 11 03 00 00 00 00 00 00 00 + * E: 000202.359132 9 11 03 00 00 00 00 00 00 00 + * E: 000202.364053 9 11 03 00 00 00 00 00 00 00 + * E: 000202.369034 9 11 03 00 00 00 00 00 00 00 + * E: 000202.374144 9 11 03 00 00 00 00 00 00 00 + * E: 000202.379027 9 11 03 00 00 00 00 00 00 00 + * E: 000202.384238 9 11 03 00 00 00 00 00 00 00 + * E: 000202.389249 9 11 03 00 00 00 00 00 00 00 + * E: 000202.394049 9 11 03 00 00 00 00 00 00 00 + * E: 000202.398949 9 11 03 00 00 00 00 00 00 00 + * E: 000202.404203 9 11 03 00 00 00 00 00 00 00 + * E: 000202.410098 9 11 03 00 00 00 00 00 00 00 + * E: 000202.415237 9 11 00 00 00 00 00 00 00 00 + * + * + * Top wheel button press and release while holding bottom wheel button + * (The reverse action (a bottom wheel button press while holding the top wheel button) is invisible.) + * E: 000071.126966 9 11 03 00 00 00 00 00 00 00 + * E: 000071.133117 9 11 03 00 00 00 00 00 00 00 + * E: 000071.137481 9 11 03 00 00 00 00 00 00 00 + * E: 000071.142036 9 11 03 00 00 00 00 00 00 00 + * E: 000071.147027 9 11 03 00 00 00 00 00 00 00 + * E: 000071.151988 9 11 03 00 00 00 00 00 00 00 + * E: 000071.157945 9 11 03 00 00 00 00 00 00 00 + * E: 000071.163657 9 11 03 00 00 00 00 00 00 00 + * E: 000071.168240 9 11 03 00 00 00 00 00 00 00 + * E: 000071.173109 9 11 02 00 00 00 00 00 00 00 < top wheel button press? + * E: 000071.178119 9 11 03 00 00 00 00 00 00 00 + * E: 000071.183046 9 11 03 00 00 00 00 00 00 00 + * E: 000071.187983 9 11 03 00 00 00 00 00 00 00 + * E: 000071.192996 9 11 03 00 00 00 00 00 00 00 + * E: 000071.198341 9 11 03 00 00 00 00 00 00 00 + * E: 000071.203122 9 11 03 00 00 00 00 00 00 00 + * E: 000071.208998 9 11 03 00 00 00 00 00 00 00 + * E: 000071.214037 9 11 03 00 00 00 00 00 00 00 + * E: 000071.218945 9 11 03 00 00 00 00 00 00 00 + * E: 000071.223835 9 11 03 00 00 00 00 00 00 00 + * E: 000071.228987 9 11 03 00 00 00 00 00 00 00 + * E: 000071.234082 9 11 03 00 00 00 00 00 00 00 + * E: 000071.239028 9 11 03 00 00 00 00 00 00 00 + * E: 000071.244307 9 11 00 00 00 00 00 00 00 00 < top wheel button release? + * E: 000071.245867 9 11 03 00 00 00 00 00 00 00 < continued hold of bottom button + * E: 000071.249959 9 11 03 00 00 00 00 00 00 00 + * E: 000071.255032 9 11 03 00 00 00 00 00 00 00 + * E: 000071.259972 9 11 03 00 00 00 00 00 00 00 + * E: 000071.265409 9 11 03 00 00 00 00 00 00 00 + * E: 000071.270156 9 11 03 00 00 00 00 00 00 00 + * E: 000071.275530 9 11 03 00 00 00 00 00 00 00 + * E: 000071.279975 9 11 03 00 00 00 00 00 00 00 + * E: 000071.285046 9 11 03 00 00 00 00 00 00 00 + * E: 000071.290906 9 11 03 00 00 00 00 00 00 00 + * E: 000071.296146 9 11 03 00 00 00 00 00 00 00 + * E: 000071.301288 9 11 03 00 00 00 00 00 00 00 + * + * Top wheel button hold while top wheel rotate CCW + * (I did not test the other combinations of this) + * E: 000022.253144 9 11 03 00 00 00 00 00 00 00 + * E: 000022.258157 9 11 03 00 00 00 00 00 00 00 + * E: 000022.262011 9 11 00 ff ff 00 00 00 00 00 + * E: 000022.264015 9 11 03 00 00 00 00 00 00 00 + * E: 000022.268976 9 11 03 00 00 00 00 00 00 00 + * + * + * + * + * + * + * + * + * NOTES ON SIMULTANEOUS BUTTON HOLDS + * (applies to vendor mode only) + * Value replacements for ease of reading: + * .7 = 0x40 (button 7, a wheel button) + * .1 = 0x01 (button 1, a pad button) + * rr = 0x00 (no buttons pressed) + * + * Press 7 + * Press 1 + * Release 7 + * Release 1 + * B: 000000.000152 42 08 e0 01 01 .7 00 00 00 00 00 00 00 00 00 + * B: 000000.781784 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000000.869845 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000001.095688 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000001.322635 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000001.543643 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000001.770652 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000001.885659 42 08 e0 01 01 rr 00 00 00 00 00 00 00 00 00 release of 7 + * B: 000001.993620 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000002.220671 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000002.446589 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000002.672559 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000002.765183 42 08 e0 01 01 rr 00 00 00 00 00 00 00 00 00 release of 1 + * + * Press 7 + * Press 1 + * Release 1 + * Release 7 + * B: 000017.071517 42 08 e0 01 01 .7 00 00 00 00 00 00 00 00 00 + * B: 000018.270461 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000018.419486 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000018.646438 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000018.872493 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000019.094422 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000019.320488 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000020.360505 42 08 e0 01 01 rr 00 00 00 00 00 00 00 00 00 release of 1 is not reported until 7 is released, then both are rapidly reported + * B: 000020.361091 42 08 e0 01 01 rr 00 00 00 00 00 00 00 00 00 + * + * Press 1 + * Press 7 + * Release 7 + * Release 1 + * B: 000031.516315 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000031.922299 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000032.144165 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000032.370262 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000032.396242 42 08 e0 01 01 .7 00 00 00 00 00 00 00 00 00 + * B: 000032.597270 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000032.818187 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000033.045143 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000033.267535 42 08 e0 01 01 rr 00 00 00 00 00 00 00 00 00 release of 7 + * B: 000033.272602 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000033.494246 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000033.721266 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000033.947237 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000034.169294 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000034.183585 42 08 e0 01 01 rr 00 00 00 00 00 00 00 00 00 release of 1 + * + * Press 1 + * Press 7 + * Release 1 + * Release 7 + * B: 000056.628429 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000057.046348 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000057.272044 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000057.494434 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000057.601224 42 08 e0 01 01 .7 00 00 00 00 00 00 00 00 00 + * B: 000057.719262 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000057.946941 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000058.172346 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000058.393994 42 08 e0 01 01 .1 00 00 00 00 00 00 00 00 00 + * B: 000059.434576 42 08 e0 01 01 rr 00 00 00 00 00 00 00 00 00 release of 1 is not reported until 7 is released, then both are rapidly reported + * B: 000059.435857 42 08 e0 01 01 rr 00 00 00 00 00 00 00 00 00 + */ + + +/* Filled in by udev-hid-bpf */ +char UDEV_PROP_HUION_FIRMWARE_ID[64]; + +char EXPECTED_FIRMWARE_ID[] = "HUION_M22c_"; + +__u8 last_button_state; + +static const __u8 disabled_rdesc_tablet[] = { + FixedSizeVendorReport(28) /* Input report 4 */ +}; + +static const __u8 disabled_rdesc_wheel[] = { + FixedSizeVendorReport(9) /* Input report 17 */ +}; + +static const __u8 fixed_rdesc_vendor[] = { + UsagePage_Digitizers + Usage_Dig_Pen + CollectionApplication( + ReportId(VENDOR_REPORT_ID) + UsagePage_Digitizers + Usage_Dig_Pen + CollectionPhysical( + /* + * I have only examined the tablet's behavior while using + * the PW600L pen, which does not have an eraser. + * Because of this, I don't know where the Eraser and Invert + * bits will go, or if they work as one would expect. + * + * For the time being, there is no expectation that a pen + * with an eraser will work without modifications here. + */ + ReportSize(1) + LogicalMinimum_i8(0) + LogicalMaximum_i8(1) + ReportCount(3) + Usage_Dig_TipSwitch + Usage_Dig_BarrelSwitch + Usage_Dig_SecondaryBarrelSwitch + Input(Var|Abs) + PushPop( + ReportCount(1) + UsagePage_Button + Usage_i8(0x4a) /* (BTN_STYLUS3 + 1) & 0xff */ + Input(Var|Abs) + ) + ReportCount(3) + Input(Const) + ReportCount(1) + Usage_Dig_InRange + Input(Var|Abs) + ReportSize(16) + ReportCount(1) + PushPop( + UsagePage_GenericDesktop + Unit(cm) + UnitExponent(-2) + LogicalMinimum_i16(0) + PhysicalMinimum_i16(0) + /* + * The tablet has a logical maximum of 58760 x 33040 + * and a claimed resolution of 5080 LPI (200 L/mm) + * This works out to a physical maximum of + * 293.8 x 165.2mm, which matches Huion's advertised + * active area dimensions from + * https://www.huion.com/products/pen_display/Kamvas/kamvas-13-gen-3.html + */ + LogicalMaximum_i16(58760) + PhysicalMaximum_i16(2938) + Usage_GD_X + Input(Var|Abs) + LogicalMaximum_i16(33040) + PhysicalMaximum_i16(1652) + Usage_GD_Y + Input(Var|Abs) + ) + LogicalMinimum_i16(0) + LogicalMaximum_i16(16383) + Usage_Dig_TipPressure + Input(Var|Abs) + ReportCount(1) + Input(Const) + ReportSize(8) + ReportCount(2) + PushPop( + Unit(deg) + UnitExponent(0) + LogicalMinimum_i8(-60) + PhysicalMinimum_i8(-60) + LogicalMaximum_i8(60) + PhysicalMaximum_i8(60) + Usage_Dig_XTilt + Usage_Dig_YTilt + Input(Var|Abs) + ) + ) + ) + UsagePage_GenericDesktop + Usage_GD_Keypad + CollectionApplication( + ReportId(CUSTOM_PAD_REPORT_ID) + LogicalMinimum_i8(0) + LogicalMaximum_i8(1) + UsagePage_Digitizers + Usage_Dig_TabletFunctionKeys + CollectionPhysical( + /* + * The first 3 bytes are somewhat vestigial and will + * always be set to zero. Their presence here is needed + * to ensure that this device will be detected as a + * tablet pad by software that otherwise wouldn't know + * any better. + */ + /* (data[1] & 0x01) barrel switch */ + ReportSize(1) + ReportCount(1) + Usage_Dig_BarrelSwitch + Input(Var|Abs) + ReportCount(7) + Input(Const) + /* data[2] X */ + /* data[3] Y */ + ReportSize(8) + ReportCount(2) + UsagePage_GenericDesktop + Usage_GD_X + Usage_GD_Y + Input(Var|Abs) + /* + * (data[4] & 0x01) button 1 + * (data[4] & 0x02) button 2 + * (data[4] & 0x04) button 3 + * (data[4] & 0x08) button 4 + * (data[4] & 0x10) button 5 + * (data[4] & 0x20) button 6 (top wheel button) + * (data[4] & 0x40) button 7 (bottom wheel button) + */ + ReportSize(1) + ReportCount(7) + UsagePage_Button + UsageMinimum_i8(1) + UsageMaximum_i8(7) + Input(Var|Abs) + ReportCount(1) + Input(Const) + /* data[5] top wheel (signed, positive clockwise) */ + ReportSize(8) + ReportCount(1) + UsagePage_GenericDesktop + Usage_GD_Wheel + LogicalMinimum_i8(-1) + LogicalMaximum_i8(1) + Input(Var|Rel) + /* data[6] bottom wheel (signed, positive clockwise) */ + UsagePage_Consumer + Usage_Con_ACPan + Input(Var|Rel) + ) + /* + * The kernel will drop reports that are bigger than the + * largest report specified in the HID descriptor. + * Therefore, our modified descriptor needs to have at least one + * HID report that is as long as, or longer than, the largest + * report in the original descriptor. + * + * This macro expands to a no-op report that is padded to the + * provided length. + */ + FixedSizeVendorReport(VENDOR_REPORT_LENGTH) + ) +}; + +SEC(HID_BPF_RDESC_FIXUP) +int BPF_PROG(hid_fix_rdesc_huion_kamvas13_gen3, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, HID_MAX_DESCRIPTOR_SIZE /* size */); + __s32 rdesc_size = hid_ctx->size; + __u8 have_fw_id; + + if (!data) + return 0; /* EPERM check */ + + have_fw_id = __builtin_memcmp(UDEV_PROP_HUION_FIRMWARE_ID, + EXPECTED_FIRMWARE_ID, + sizeof(EXPECTED_FIRMWARE_ID) - 1) == 0; + + if (have_fw_id) { + /* + * Tablet should be in vendor mode. + * Disable the unused devices + */ + if (rdesc_size == TABLET_DESCRIPTOR_LENGTH) { + __builtin_memcpy(data, disabled_rdesc_tablet, + sizeof(disabled_rdesc_tablet)); + return sizeof(disabled_rdesc_tablet); + } + + if (rdesc_size == WHEEL_DESCRIPTOR_LENGTH) { + __builtin_memcpy(data, disabled_rdesc_wheel, + sizeof(disabled_rdesc_wheel)); + return sizeof(disabled_rdesc_wheel); + } + } + + /* + * Regardless of which mode the tablet is in, always fix the vendor + * descriptor in case the udev property just happened to not be set + */ + if (rdesc_size == VENDOR_DESCRIPTOR_LENGTH) { + __builtin_memcpy(data, fixed_rdesc_vendor, sizeof(fixed_rdesc_vendor)); + return sizeof(fixed_rdesc_vendor); + } + + return 0; +} + +SEC(HID_BPF_DEVICE_EVENT) +int BPF_PROG(hid_fix_event_huion_kamvas13_gen3, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, VENDOR_REPORT_LENGTH /* size */); + + if (!data) + return 0; /* EPERM check */ + + /* Handle vendor reports only */ + if (hid_ctx->size != VENDOR_REPORT_LENGTH) + return 0; + if (data[0] != VENDOR_REPORT_ID) + return 0; + + __u8 report_subtype = (data[1] >> 4) & 0x0f; + + if (report_subtype == VENDOR_REPORT_SUBTYPE_PEN || + report_subtype == VENDOR_REPORT_SUBTYPE_PEN_OUT) { + /* Invert Y tilt */ + data[11] = -data[11]; + + } else if (report_subtype == VENDOR_REPORT_SUBTYPE_BUTTONS || + report_subtype == VENDOR_REPORT_SUBTYPE_WHEELS) { + struct pad_report { + __u8 report_id; + __u8 btn_stylus:1; + __u8 padding:7; + __u8 x; + __u8 y; + __u8 buttons; + __s8 top_wheel; + __s8 bottom_wheel; + } __attribute__((packed)) *pad_report; + + __s8 top_wheel = 0; + __s8 bottom_wheel = 0; + + switch (report_subtype) { + case VENDOR_REPORT_SUBTYPE_WHEELS: + /* + * The wheel direction byte is 1 for clockwise rotation + * and 2 for counter-clockwise. + * Change it to 1 and -1, respectively. + */ + switch (data[3]) { + case 1: + top_wheel = (data[5] == 1) ? 1 : -1; + break; + case 2: + bottom_wheel = (data[5] == 1) ? 1 : -1; + break; + } + break; + + case VENDOR_REPORT_SUBTYPE_BUTTONS: + /* + * If a button is already being held, ignore any new + * button event unless it's a release. + * + * The tablet only cleanly handles one button being held + * at a time, and trying to hold multiple buttons + * (particularly wheel+pad buttons) can result in sequences + * of reports that look like imaginary presses and releases. + * + * This is an imperfect way to filter out some of these + * reports. + */ + if (last_button_state != 0x00 && data[4] != 0x00) + break; + + last_button_state = data[4]; + break; + } + + + pad_report = (struct pad_report *)data; + + pad_report->report_id = CUSTOM_PAD_REPORT_ID; + pad_report->btn_stylus = 0; + pad_report->x = 0; + pad_report->y = 0; + pad_report->buttons = last_button_state; + pad_report->top_wheel = top_wheel; + pad_report->bottom_wheel = bottom_wheel; + + return sizeof(struct pad_report); + } + + return 0; +} + +HID_BPF_OPS(huion_kamvas13_gen3) = { + .hid_device_event = (void *)hid_fix_event_huion_kamvas13_gen3, + .hid_rdesc_fixup = (void *)hid_fix_rdesc_huion_kamvas13_gen3, +}; + +SEC("syscall") +int probe(struct hid_bpf_probe_args *ctx) +{ + switch (ctx->rdesc_size) { + case VENDOR_DESCRIPTOR_LENGTH: + case TABLET_DESCRIPTOR_LENGTH: + case WHEEL_DESCRIPTOR_LENGTH: + ctx->retval = 0; + break; + default: + ctx->retval = -EINVAL; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; From 0412be1b8125204a15930128c2a1ed73b0f59f84 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:24 +0100 Subject: [PATCH 1068/1075] HID: bpf: support for Huion Kamvas 16 Gen 3 Another Huion Kamvas tablet support. Again it's safer to duplicate the code source in a separate file to ensure we are not breaking any existing device. Signed-off-by: Higgins Dragon Signed-off-by: Benjamin Tissoires Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/207 Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .../hid/bpf/progs/Huion__Kamvas16Gen3.bpf.c | 724 ++++++++++++++++++ 1 file changed, 724 insertions(+) create mode 100644 drivers/hid/bpf/progs/Huion__Kamvas16Gen3.bpf.c diff --git a/drivers/hid/bpf/progs/Huion__Kamvas16Gen3.bpf.c b/drivers/hid/bpf/progs/Huion__Kamvas16Gen3.bpf.c new file mode 100644 index 000000000000..ac66c6e65eb4 --- /dev/null +++ b/drivers/hid/bpf/progs/Huion__Kamvas16Gen3.bpf.c @@ -0,0 +1,724 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Nicholas LaPointe + * Copyright (c) 2025 Higgins Dragon + */ + +#include "vmlinux.h" +#include "hid_bpf.h" +#include "hid_bpf_helpers.h" +#include "hid_report_helpers.h" +#include + +#define VID_HUION 0x256c +#define PID_KAMVAS16_GEN3 0x2009 + +#define VENDOR_DESCRIPTOR_LENGTH 36 +#define TABLET_DESCRIPTOR_LENGTH 328 +#define WHEEL_DESCRIPTOR_LENGTH 200 + +#define VENDOR_REPORT_ID 8 +#define VENDOR_REPORT_LENGTH 14 + +#define VENDOR_REPORT_SUBTYPE_PEN 0x08 +#define VENDOR_REPORT_SUBTYPE_PEN_OUT 0x00 +#define VENDOR_REPORT_SUBTYPE_BUTTONS 0x0e +#define VENDOR_REPORT_SUBTYPE_WHEELS 0x0f + +/* For the reports that we create ourselves */ +#define CUSTOM_PAD_REPORT_ID 9 + +HID_BPF_CONFIG( + HID_DEVICE(BUS_USB, HID_GROUP_ANY, VID_HUION, PID_KAMVAS16_GEN3), +); + +/* + * This tablet can send reports using one of two different data formats, + * depending on what "mode" the tablet is in. + * + * By default, the tablet will send reports that can be decoded using its + * included HID descriptors (descriptors 1 and 2, shown below). + * This mode will be called "firmware mode" throughout this file. + * + * The HID descriptor that describes pen events in firmware mode (descriptor 1) + * has multiple bugs: + * * "Secondary Tip Switch" instead of "Secondary Barrel Switch" + * * "Invert" instead of (or potentially shared with) third barrel button + * * Specified tablet area of 2048 in³ instead of 293.8 x 165.2mm + * * Specified tilt range of -90 to +90 instead of -60 to +60 + * + * While these can be easily patched up by editing the descriptor, a larger + * problem with the firmware mode exists: it is impossible to tell which of the + * two wheels are being rotated (or having their central button pressed). + * + * + * By using a tool such as huion-switcher (https://github.com/whot/huion-switcher), + * the tablet can be made to send reports using a proprietary format that is not + * adequately described by its relevant descriptor (descriptor 0, shown below). + * This mode will be called "vendor mode" throughout this file. + * + * The reports sent while in vendor mode allow for proper decoding of the wheels. + * + * For simplicity and maximum functionality, this BPF focuses strictly on + * enabling one to make use of the vendor mode. + */ + +/* + * DESCRIPTORS + * DESCRIPTOR 0 + * # 0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page 1) 0 + * # 0x09, 0x01, // Usage (Vendor Usage 1) 3 + * # 0xa1, 0x01, // Collection (Application) 5 + * # 0x85, 0x08, // Report ID (8) 7 + * # 0x75, 0x68, // Report Size (104) 9 + * # 0x95, 0x01, // Report Count (1) 11 + * # 0x09, 0x01, // Usage (Vendor Usage 1) 13 + * # 0x81, 0x02, // Input (Data,Var,Abs) 15 + * # 0xc0, // End Collection 17 + * # 0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page 1) 18 + * # 0x09, 0x01, // Usage (Vendor Usage 1) 21 + * # 0xa1, 0x01, // Collection (Application) 23 + * # 0x85, 0x16, // Report ID (22) 25 + * # 0x75, 0x08, // Report Size (8) 27 + * # 0x95, 0x07, // Report Count (7) 29 + * # 0x09, 0x01, // Usage (Vendor Usage 1) 31 + * # 0xb1, 0x02, // Feature (Data,Var,Abs) 33 + * # 0xc0, // End Collection 35 + * # + * R: 36 06 00 ff 09 01 a1 01 85 08 75 68 95 01 09 01 81 02 c0 06 00 ff 09 01 a1 01 85 16 75 08 95 07 09 01 b1 02 c0 + * N: HUION Huion Tablet_GS1563 + * I: 3 256c 2009 + * + * + * DESCRIPTOR 1 + * # 0x05, 0x0d, // Usage Page (Digitizers) 0 + * # 0x09, 0x02, // Usage (Pen) 2 + * # 0xa1, 0x01, // Collection (Application) 4 + * # 0x85, 0x0a, // Report ID (10) 6 + * # 0x09, 0x20, // Usage (Stylus) 8 + * # 0xa1, 0x01, // Collection (Application) 10 + * # 0x09, 0x42, // Usage (Tip Switch) 12 + * # 0x09, 0x44, // Usage (Barrel Switch) 14 + * # 0x09, 0x43, // Usage (Secondary Tip Switch) 16 + * # 0x09, 0x3c, // Usage (Invert) 18 + * # 0x09, 0x45, // Usage (Eraser) 20 + * # 0x15, 0x00, // Logical Minimum (0) 22 + * # 0x25, 0x01, // Logical Maximum (1) 24 + * # 0x75, 0x01, // Report Size (1) 26 + * # 0x95, 0x06, // Report Count (6) 28 + * # 0x81, 0x02, // Input (Data,Var,Abs) 30 + * # 0x09, 0x32, // Usage (In Range) 32 + * # 0x75, 0x01, // Report Size (1) 34 + * # 0x95, 0x01, // Report Count (1) 36 + * # 0x81, 0x02, // Input (Data,Var,Abs) 38 + * # 0x81, 0x03, // Input (Cnst,Var,Abs) 40 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 42 + * # 0x09, 0x30, // Usage (X) 44 + * # 0x09, 0x31, // Usage (Y) 46 + * # 0x55, 0x0d, // Unit Exponent (-3) 48 + * # 0x65, 0x33, // Unit (EnglishLinear: in³) 50 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 52 + * # 0x35, 0x00, // Physical Minimum (0) 55 + * # 0x46, 0x00, 0x08, // Physical Maximum (2048) 57 + * # 0x75, 0x10, // Report Size (16) 60 + * # 0x95, 0x02, // Report Count (2) 62 + * # 0x81, 0x02, // Input (Data,Var,Abs) 64 + * # 0x05, 0x0d, // Usage Page (Digitizers) 66 + * # 0x09, 0x30, // Usage (Tip Pressure) 68 + * # 0x26, 0xff, 0x3f, // Logical Maximum (16383) 70 + * # 0x75, 0x10, // Report Size (16) 73 + * # 0x95, 0x01, // Report Count (1) 75 + * # 0x81, 0x02, // Input (Data,Var,Abs) 77 + * # 0x09, 0x3d, // Usage (X Tilt) 79 + * # 0x09, 0x3e, // Usage (Y Tilt) 81 + * # 0x15, 0xa6, // Logical Minimum (-90) 83 + * # 0x25, 0x5a, // Logical Maximum (90) 85 + * # 0x75, 0x08, // Report Size (8) 87 + * # 0x95, 0x02, // Report Count (2) 89 + * # 0x81, 0x02, // Input (Data,Var,Abs) 91 + * # 0xc0, // End Collection 93 + * # 0xc0, // End Collection 94 + * # 0x05, 0x0d, // Usage Page (Digitizers) 95 + * # 0x09, 0x04, // Usage (Touch Screen) 97 + * # 0xa1, 0x01, // Collection (Application) 99 + * # 0x85, 0x04, // Report ID (4) 101 + * # 0x09, 0x22, // Usage (Finger) 103 + * # 0xa1, 0x02, // Collection (Logical) 105 + * # 0x05, 0x0d, // Usage Page (Digitizers) 107 + * # 0x95, 0x01, // Report Count (1) 109 + * # 0x75, 0x06, // Report Size (6) 111 + * # 0x09, 0x51, // Usage (Contact Id) 113 + * # 0x15, 0x00, // Logical Minimum (0) 115 + * # 0x25, 0x3f, // Logical Maximum (63) 117 + * # 0x81, 0x02, // Input (Data,Var,Abs) 119 + * # 0x09, 0x42, // Usage (Tip Switch) 121 + * # 0x25, 0x01, // Logical Maximum (1) 123 + * # 0x75, 0x01, // Report Size (1) 125 + * # 0x95, 0x01, // Report Count (1) 127 + * # 0x81, 0x02, // Input (Data,Var,Abs) 129 + * # 0x75, 0x01, // Report Size (1) 131 + * # 0x95, 0x01, // Report Count (1) 133 + * # 0x81, 0x03, // Input (Cnst,Var,Abs) 135 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 137 + * # 0x75, 0x10, // Report Size (16) 139 + * # 0x55, 0x0e, // Unit Exponent (-2) 141 + * # 0x65, 0x11, // Unit (SILinear: cm) 143 + * # 0x09, 0x30, // Usage (X) 145 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 147 + * # 0x35, 0x00, // Physical Minimum (0) 150 + * # 0x46, 0x15, 0x0c, // Physical Maximum (3093) 152 + * # 0x81, 0x42, // Input (Data,Var,Abs,Null) 155 + * # 0x09, 0x31, // Usage (Y) 157 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 159 + * # 0x46, 0xcb, 0x06, // Physical Maximum (1739) 162 + * # 0x81, 0x42, // Input (Data,Var,Abs,Null) 165 + * # 0x05, 0x0d, // Usage Page (Digitizers) 167 + * # 0x09, 0x30, // Usage (Tip Pressure) 169 + * # 0x26, 0xff, 0x1f, // Logical Maximum (8191) 171 + * # 0x75, 0x10, // Report Size (16) 174 + * # 0x95, 0x01, // Report Count (1) 176 + * # 0x81, 0x02, // Input (Data,Var,Abs) 178 + * # 0xc0, // End Collection 180 + * # 0x05, 0x0d, // Usage Page (Digitizers) 181 + * # 0x09, 0x22, // Usage (Finger) 183 + * # 0xa1, 0x02, // Collection (Logical) 185 + * # 0x05, 0x0d, // Usage Page (Digitizers) 187 + * # 0x95, 0x01, // Report Count (1) 189 + * # 0x75, 0x06, // Report Size (6) 191 + * # 0x09, 0x51, // Usage (Contact Id) 193 + * # 0x15, 0x00, // Logical Minimum (0) 195 + * # 0x25, 0x3f, // Logical Maximum (63) 197 + * # 0x81, 0x02, // Input (Data,Var,Abs) 199 + * # 0x09, 0x42, // Usage (Tip Switch) 201 + * # 0x25, 0x01, // Logical Maximum (1) 203 + * # 0x75, 0x01, // Report Size (1) 205 + * # 0x95, 0x01, // Report Count (1) 207 + * # 0x81, 0x02, // Input (Data,Var,Abs) 209 + * # 0x75, 0x01, // Report Size (1) 211 + * # 0x95, 0x01, // Report Count (1) 213 + * # 0x81, 0x03, // Input (Cnst,Var,Abs) 215 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 217 + * # 0x75, 0x10, // Report Size (16) 219 + * # 0x55, 0x0e, // Unit Exponent (-2) 221 + * # 0x65, 0x11, // Unit (SILinear: cm) 223 + * # 0x09, 0x30, // Usage (X) 225 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 227 + * # 0x35, 0x00, // Physical Minimum (0) 230 + * # 0x46, 0x15, 0x0c, // Physical Maximum (3093) 232 + * # 0x81, 0x42, // Input (Data,Var,Abs,Null) 235 + * # 0x09, 0x31, // Usage (Y) 237 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 239 + * # 0x46, 0xcb, 0x06, // Physical Maximum (1739) 242 + * # 0x81, 0x42, // Input (Data,Var,Abs,Null) 245 + * # 0x05, 0x0d, // Usage Page (Digitizers) 247 + * # 0x09, 0x30, // Usage (Tip Pressure) 249 + * # 0x26, 0xff, 0x1f, // Logical Maximum (8191) 251 + * # 0x75, 0x10, // Report Size (16) 254 + * # 0x95, 0x01, // Report Count (1) 256 + * # 0x81, 0x02, // Input (Data,Var,Abs) 258 + * # 0xc0, // End Collection 260 + * # 0x05, 0x0d, // Usage Page (Digitizers) 261 + * # 0x09, 0x56, // Usage (Scan Time) 263 + * # 0x55, 0x00, // Unit Exponent (0) 265 + * # 0x65, 0x00, // Unit (None) 267 + * # 0x27, 0xff, 0xff, 0xff, 0x7f, // Logical Maximum (2147483647) 269 + * # 0x95, 0x01, // Report Count (1) 274 + * # 0x75, 0x20, // Report Size (32) 276 + * # 0x81, 0x02, // Input (Data,Var,Abs) 278 + * # 0x09, 0x54, // Usage (Contact Count) 280 + * # 0x25, 0x7f, // Logical Maximum (127) 282 + * # 0x95, 0x01, // Report Count (1) 284 + * # 0x75, 0x08, // Report Size (8) 286 + * # 0x81, 0x02, // Input (Data,Var,Abs) 288 + * # 0x75, 0x08, // Report Size (8) 290 + * # 0x95, 0x08, // Report Count (8) 292 + * # 0x81, 0x03, // Input (Cnst,Var,Abs) 294 + * # 0x85, 0x05, // Report ID (5) 296 + * # 0x09, 0x55, // Usage (Contact Max) 298 + * # 0x25, 0x0a, // Logical Maximum (10) 300 + * # 0x75, 0x08, // Report Size (8) 302 + * # 0x95, 0x01, // Report Count (1) 304 + * # 0xb1, 0x02, // Feature (Data,Var,Abs) 306 + * # 0x06, 0x00, 0xff, // Usage Page (Vendor Defined Page 1) 308 + * # 0x09, 0xc5, // Usage (Vendor Usage 0xc5) 311 + * # 0x85, 0x06, // Report ID (6) 313 + * # 0x15, 0x00, // Logical Minimum (0) 315 + * # 0x26, 0xff, 0x00, // Logical Maximum (255) 317 + * # 0x75, 0x08, // Report Size (8) 320 + * # 0x96, 0x00, 0x01, // Report Count (256) 322 + * # 0xb1, 0x02, // Feature (Data,Var,Abs) 325 + * # 0xc0, // End Collection 327 + * # + * R: 328 05 0d 09 02 a1 01 85 0a 09 20 a1 01 09 42 09 44 09 43 09 3c 09 45 15 00 25 01 75 01 95 06 81 02 09 32 75 01 95 01 81 02 81 03 05 01 09 30 09 31 55 0d 65 33 26 ff 7f 35 00 46 00 08 75 10 95 02 81 02 05 0d 09 30 26 ff 3f 75 10 95 01 81 02 09 3d 09 3e 15 a6 25 5a 75 08 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 75 08 95 08 81 03 85 05 09 55 25 0a 75 08 95 01 b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 + * N: HUION Huion Tablet_GS1563 + * I: 3 256c 2009 + * + * DESCRIPTOR 2 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 0 + * # 0x09, 0x0e, // Usage (System Multi-Axis Controller) 2 + * # 0xa1, 0x01, // Collection (Application) 4 + * # 0x85, 0x11, // Report ID (17) 6 + * # 0x05, 0x0d, // Usage Page (Digitizers) 8 + * # 0x09, 0x21, // Usage (Puck) 10 + * # 0xa1, 0x02, // Collection (Logical) 12 + * # 0x15, 0x00, // Logical Minimum (0) 14 + * # 0x25, 0x01, // Logical Maximum (1) 16 + * # 0x75, 0x01, // Report Size (1) 18 + * # 0x95, 0x01, // Report Count (1) 20 + * # 0xa1, 0x00, // Collection (Physical) 22 + * # 0x05, 0x09, // Usage Page (Button) 24 + * # 0x09, 0x01, // Usage (Vendor Usage 0x01) 26 + * # 0x81, 0x02, // Input (Data,Var,Abs) 28 + * # 0x05, 0x0d, // Usage Page (Digitizers) 30 + * # 0x09, 0x33, // Usage (Touch) 32 + * # 0x81, 0x02, // Input (Data,Var,Abs) 34 + * # 0x95, 0x06, // Report Count (6) 36 + * # 0x81, 0x03, // Input (Cnst,Var,Abs) 38 + * # 0xa1, 0x02, // Collection (Logical) 40 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 42 + * # 0x09, 0x37, // Usage (Dial) 44 + * # 0x16, 0x00, 0x80, // Logical Minimum (-32768) 46 + * # 0x26, 0xff, 0x7f, // Logical Maximum (32767) 49 + * # 0x75, 0x10, // Report Size (16) 52 + * # 0x95, 0x01, // Report Count (1) 54 + * # 0x81, 0x06, // Input (Data,Var,Rel) 56 + * # 0x35, 0x00, // Physical Minimum (0) 58 + * # 0x46, 0x10, 0x0e, // Physical Maximum (3600) 60 + * # 0x15, 0x00, // Logical Minimum (0) 63 + * # 0x26, 0x10, 0x0e, // Logical Maximum (3600) 65 + * # 0x09, 0x48, // Usage (Resolution Multiplier) 68 + * # 0xb1, 0x02, // Feature (Data,Var,Abs) 70 + * # 0x45, 0x00, // Physical Maximum (0) 72 + * # 0xc0, // End Collection 74 + * # 0x75, 0x08, // Report Size (8) 75 + * # 0x95, 0x01, // Report Count (1) 77 + * # 0x81, 0x01, // Input (Cnst,Arr,Abs) 79 + * # 0x75, 0x08, // Report Size (8) 81 + * # 0x95, 0x01, // Report Count (1) 83 + * # 0x81, 0x01, // Input (Cnst,Arr,Abs) 85 + * # 0x75, 0x08, // Report Size (8) 87 + * # 0x95, 0x01, // Report Count (1) 89 + * # 0x81, 0x01, // Input (Cnst,Arr,Abs) 91 + * # 0x75, 0x08, // Report Size (8) 93 + * # 0x95, 0x01, // Report Count (1) 95 + * # 0x81, 0x01, // Input (Cnst,Arr,Abs) 97 + * # 0x75, 0x08, // Report Size (8) 99 + * # 0x95, 0x01, // Report Count (1) 101 + * # 0x81, 0x01, // Input (Cnst,Arr,Abs) 103 + * # 0xc0, // End Collection 105 + * # 0xc0, // End Collection 106 + * # 0xc0, // End Collection 107 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 108 + * # 0x09, 0x06, // Usage (Keyboard) 110 + * # 0xa1, 0x01, // Collection (Application) 112 + * # 0x85, 0x03, // Report ID (3) 114 + * # 0x05, 0x07, // Usage Page (Keyboard) 116 + * # 0x19, 0xe0, // Usage Minimum (224) 118 + * # 0x29, 0xe7, // Usage Maximum (231) 120 + * # 0x15, 0x00, // Logical Minimum (0) 122 + * # 0x25, 0x01, // Logical Maximum (1) 124 + * # 0x75, 0x01, // Report Size (1) 126 + * # 0x95, 0x08, // Report Count (8) 128 + * # 0x81, 0x02, // Input (Data,Var,Abs) 130 + * # 0x05, 0x07, // Usage Page (Keyboard) 132 + * # 0x19, 0x00, // Usage Minimum (0) 134 + * # 0x29, 0xff, // Usage Maximum (255) 136 + * # 0x26, 0xff, 0x00, // Logical Maximum (255) 138 + * # 0x75, 0x08, // Report Size (8) 141 + * # 0x95, 0x06, // Report Count (6) 143 + * # 0x81, 0x00, // Input (Data,Arr,Abs) 145 + * # 0xc0, // End Collection 147 + * # 0x05, 0x0c, // Usage Page (Consumer Devices) 148 + * # 0x09, 0x01, // Usage (Consumer Control) 150 + * # 0xa1, 0x01, // Collection (Application) 152 + * # 0x85, 0x04, // Report ID (4) 154 + * # 0x19, 0x01, // Usage Minimum (1) 156 + * # 0x2a, 0x9c, 0x02, // Usage Maximum (668) 158 + * # 0x15, 0x01, // Logical Minimum (1) 161 + * # 0x26, 0x9c, 0x02, // Logical Maximum (668) 163 + * # 0x95, 0x01, // Report Count (1) 166 + * # 0x75, 0x10, // Report Size (16) 168 + * # 0x81, 0x00, // Input (Data,Arr,Abs) 170 + * # 0xc0, // End Collection 172 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 173 + * # 0x09, 0x80, // Usage (System Control) 175 + * # 0xa1, 0x01, // Collection (Application) 177 + * # 0x85, 0x05, // Report ID (5) 179 + * # 0x19, 0x81, // Usage Minimum (129) 181 + * # 0x29, 0x83, // Usage Maximum (131) 183 + * # 0x15, 0x00, // Logical Minimum (0) 185 + * # 0x25, 0x01, // Logical Maximum (1) 187 + * # 0x75, 0x01, // Report Size (1) 189 + * # 0x95, 0x03, // Report Count (3) 191 + * # 0x81, 0x02, // Input (Data,Var,Abs) 193 + * # 0x95, 0x05, // Report Count (5) 195 + * # 0x81, 0x01, // Input (Cnst,Arr,Abs) 197 + * # 0xc0, // End Collection 199 + * # + * R: 200 05 01 09 0e a1 01 85 11 05 0d 09 21 a1 02 15 00 25 01 75 01 95 01 a1 00 05 09 09 01 81 02 05 0d 09 33 81 02 95 06 81 03 a1 02 05 01 09 37 16 00 80 26 ff 7f 75 10 95 01 81 06 35 00 46 10 0e 15 00 26 10 0e 09 48 b1 02 45 00 c0 75 08 95 01 81 01 75 08 95 01 81 01 75 08 95 01 81 01 75 08 95 01 81 01 75 08 95 01 81 01 c0 c0 c0 05 01 09 06 a1 01 85 03 05 07 19 e0 29 e7 15 00 25 01 75 01 95 08 81 02 05 07 19 00 29 ff 26 ff 00 75 08 95 06 81 00 c0 05 0c 09 01 a1 01 85 04 19 01 2a 9c 02 15 01 26 9c 02 95 01 75 10 81 00 c0 05 01 09 80 a1 01 85 05 19 81 29 83 15 00 25 01 75 01 95 03 81 02 95 05 81 01 c0 + * N: HUION Huion Tablet_GS1563 + * I: 3 256c 2009 + * + * + * + * VENDOR MODE + * HUION_FIRMWARE_ID="HUION_M22d_241101" + * HUION_MAGIC_BYTES="1403201101ac9900ff3fd81305080080083c4010" + * + * MAGIC BYTES + * [LogicalMaximum, X ] [LogicalMaximum, Y ] [LogicalMaximum, Pressure] [ LPI] + * 14 03 [ 20 11 01] [ ac 99 00] [ ff 3f] [d8 13] 05 08 00 80 08 3c 40 10 + * + * See Huion__Kamvas13Gen3.bpf.c for more details on detailed button/dial reports and caveats. It's very + * similar to the Kamvas 16 Gen 3. + */ + + +/* Filled in by udev-hid-bpf */ +char UDEV_PROP_HUION_FIRMWARE_ID[64]; + +char EXPECTED_FIRMWARE_ID[] = "HUION_M22d_"; + +__u8 last_button_state; + +static const __u8 disabled_rdesc_tablet[] = { + FixedSizeVendorReport(28) /* Input report 4 */ +}; + +static const __u8 disabled_rdesc_wheel[] = { + FixedSizeVendorReport(9) /* Input report 17 */ +}; + +static const __u8 fixed_rdesc_vendor[] = { + UsagePage_Digitizers + Usage_Dig_Pen + CollectionApplication( + ReportId(VENDOR_REPORT_ID) + UsagePage_Digitizers + Usage_Dig_Pen + CollectionPhysical( + /* + * I have only examined the tablet's behavior while using + * the PW600L pen, which does not have an eraser. + * Because of this, I don't know where the Eraser and Invert + * bits will go, or if they work as one would expect. + * + * For the time being, there is no expectation that a pen + * with an eraser will work without modifications here. + */ + ReportSize(1) + LogicalMinimum_i8(0) + LogicalMaximum_i8(1) + ReportCount(3) + Usage_Dig_TipSwitch + Usage_Dig_BarrelSwitch + Usage_Dig_SecondaryBarrelSwitch + Input(Var|Abs) + PushPop( + ReportCount(1) + UsagePage_Button + Usage_i8(0x4a) /* (BTN_STYLUS3 + 1) & 0xff */ + Input(Var|Abs) + ) + ReportCount(3) + Input(Const) + ReportCount(1) + Usage_Dig_InRange + Input(Var|Abs) + ReportSize(16) + ReportCount(1) + PushPop( + UsagePage_GenericDesktop + Unit(cm) + UnitExponent(-2) + LogicalMinimum_i16(0) + PhysicalMinimum_i16(0) + /* + * The tablet has a logical maximum of 69920 x 39340 + * and a claimed resolution of 5080 LPI (200 L/mm) + * This works out to a physical maximum of + * 349.6 x 196.7mm, which matches Huion's advertised + * (rounded) active area dimensions from + * https://www.huion.com/products/pen_display/Kamvas/kamvas-16-gen-3.html + * + * The Kamvas uses data[8] for the 3rd byte of the X-axis, and adding + * that after data[2] and data[3] makes a contiguous little-endian + * 24-bit value. (See BPF_PROG below) + */ + ReportSize(24) + LogicalMaximum_i32(69920) + PhysicalMaximum_i16(3496) + Usage_GD_X + Input(Var|Abs) + ReportSize(16) + LogicalMaximum_i16(39340) + PhysicalMaximum_i16(1967) + Usage_GD_Y + Input(Var|Abs) + ) + ReportSize(16) + LogicalMinimum_i16(0) + LogicalMaximum_i16(16383) + Usage_Dig_TipPressure + Input(Var|Abs) + ReportSize(8) + ReportCount(1) + Input(Const) + ReportCount(2) + PushPop( + Unit(deg) + UnitExponent(0) + LogicalMinimum_i8(-60) + PhysicalMinimum_i8(-60) + LogicalMaximum_i8(60) + PhysicalMaximum_i8(60) + Usage_Dig_XTilt + Usage_Dig_YTilt + Input(Var|Abs) + ) + ) + ) + UsagePage_GenericDesktop + Usage_GD_Keypad + CollectionApplication( + ReportId(CUSTOM_PAD_REPORT_ID) + LogicalMinimum_i8(0) + LogicalMaximum_i8(1) + UsagePage_Digitizers + Usage_Dig_TabletFunctionKeys + CollectionPhysical( + /* + * The first 3 bytes are somewhat vestigial and will + * always be set to zero. Their presence here is needed + * to ensure that this device will be detected as a + * tablet pad by software that otherwise wouldn't know + * any better. + */ + /* (data[1] & 0x01) barrel switch */ + ReportSize(1) + ReportCount(1) + Usage_Dig_BarrelSwitch + Input(Var|Abs) + ReportCount(7) + Input(Const) + /* data[2] X */ + /* data[3] Y */ + ReportSize(8) + ReportCount(2) + UsagePage_GenericDesktop + Usage_GD_X + Usage_GD_Y + Input(Var|Abs) + /* + * (data[4] & 0x01) button 1 + * (data[4] & 0x02) button 2 + * (data[4] & 0x04) button 3 + * (data[4] & 0x08) button 4 + * (data[4] & 0x10) button 5 + * (data[4] & 0x20) button 6 + * (data[4] & 0x40) button 7 (top wheel button) + * (data[4] & 0x80) button 8 (bottom wheel button) + */ + ReportSize(1) + ReportCount(8) + UsagePage_Button + UsageMinimum_i8(1) + UsageMaximum_i8(8) + Input(Var|Abs) + /* data[5] top wheel (signed, positive clockwise) */ + ReportSize(8) + ReportCount(1) + UsagePage_GenericDesktop + Usage_GD_Wheel + LogicalMinimum_i8(-1) + LogicalMaximum_i8(1) + Input(Var|Rel) + /* data[6] bottom wheel (signed, positive clockwise) */ + UsagePage_Consumer + Usage_Con_ACPan + Input(Var|Rel) + ) + /* + * The kernel will drop reports that are bigger than the + * largest report specified in the HID descriptor. + * Therefore, our modified descriptor needs to have at least one + * HID report that is as long as, or longer than, the largest + * report in the original descriptor. + * + * This macro expands to a no-op report that is padded to the + * provided length. + */ + FixedSizeVendorReport(VENDOR_REPORT_LENGTH) + ) +}; + +SEC(HID_BPF_RDESC_FIXUP) +int BPF_PROG(hid_fix_rdesc_huion_kamvas16_gen3, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, HID_MAX_DESCRIPTOR_SIZE /* size */); + __s32 rdesc_size = hid_ctx->size; + __u8 have_fw_id; + + if (!data) + return 0; /* EPERM check */ + + have_fw_id = __builtin_memcmp(UDEV_PROP_HUION_FIRMWARE_ID, + EXPECTED_FIRMWARE_ID, + sizeof(EXPECTED_FIRMWARE_ID) - 1) == 0; + + if (have_fw_id) { + /* + * Tablet should be in vendor mode. + * Disable the unused devices + */ + if (rdesc_size == TABLET_DESCRIPTOR_LENGTH) { + __builtin_memcpy(data, disabled_rdesc_tablet, + sizeof(disabled_rdesc_tablet)); + return sizeof(disabled_rdesc_tablet); + } + + if (rdesc_size == WHEEL_DESCRIPTOR_LENGTH) { + __builtin_memcpy(data, disabled_rdesc_wheel, + sizeof(disabled_rdesc_wheel)); + return sizeof(disabled_rdesc_wheel); + } + } + + /* + * Regardless of which mode the tablet is in, always fix the vendor + * descriptor in case the udev property just happened to not be set + */ + if (rdesc_size == VENDOR_DESCRIPTOR_LENGTH) { + __builtin_memcpy(data, fixed_rdesc_vendor, sizeof(fixed_rdesc_vendor)); + return sizeof(fixed_rdesc_vendor); + } + + return 0; +} + +SEC(HID_BPF_DEVICE_EVENT) +int BPF_PROG(hid_fix_event_huion_kamvas16_gen3, struct hid_bpf_ctx *hid_ctx) +{ + __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, VENDOR_REPORT_LENGTH /* size */); + + if (!data) + return 0; /* EPERM check */ + + /* Handle vendor reports only */ + if (hid_ctx->size != VENDOR_REPORT_LENGTH) + return 0; + if (data[0] != VENDOR_REPORT_ID) + return 0; + + __u8 report_subtype = (data[1] >> 4) & 0x0f; + + if (report_subtype == VENDOR_REPORT_SUBTYPE_PEN || + report_subtype == VENDOR_REPORT_SUBTYPE_PEN_OUT) { + /* Invert Y tilt */ + data[11] = -data[11]; + + /* + * Rearrange the bytes of the report so that + * [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + * will be arranged as + * [0, 1, 2, 3, 8, 4, 5, 6, 7, 9, 10, 11, 12, 13] + */ + __u8 x_24 = data[8]; + + data[8] = data[7]; + data[7] = data[6]; + data[6] = data[5]; + data[5] = data[4]; + + data[4] = x_24; + + } else if (report_subtype == VENDOR_REPORT_SUBTYPE_BUTTONS || + report_subtype == VENDOR_REPORT_SUBTYPE_WHEELS) { + struct pad_report { + __u8 report_id; + __u8 btn_stylus:1; + __u8 padding:7; + __u8 x; + __u8 y; + __u8 buttons; + __s8 top_wheel; + __s8 bottom_wheel; + } __attribute__((packed)) *pad_report; + + __s8 top_wheel = 0; + __s8 bottom_wheel = 0; + + switch (report_subtype) { + case VENDOR_REPORT_SUBTYPE_WHEELS: + /* + * The wheel direction byte is 1 for clockwise rotation + * and 2 for counter-clockwise. + * Change it to 1 and -1, respectively. + */ + switch (data[3]) { + case 1: + top_wheel = (data[5] == 1) ? 1 : -1; + break; + case 2: + bottom_wheel = (data[5] == 1) ? 1 : -1; + break; + } + break; + + case VENDOR_REPORT_SUBTYPE_BUTTONS: + /* + * If a button is already being held, ignore any new + * button event unless it's a release. + * + * The tablet only cleanly handles one button being held + * at a time, and trying to hold multiple buttons + * (particularly wheel+pad buttons) can result in sequences + * of reports that look like imaginary presses and releases. + * + * This is an imperfect way to filter out some of these + * reports. + */ + if (last_button_state != 0x00 && data[4] != 0x00) + break; + + last_button_state = data[4]; + break; + } + + pad_report = (struct pad_report *)data; + + pad_report->report_id = CUSTOM_PAD_REPORT_ID; + pad_report->btn_stylus = 0; + pad_report->x = 0; + pad_report->y = 0; + pad_report->buttons = last_button_state; + pad_report->top_wheel = top_wheel; + pad_report->bottom_wheel = bottom_wheel; + + return sizeof(struct pad_report); + } + + return 0; +} + +HID_BPF_OPS(huion_kamvas16_gen3) = { + .hid_device_event = (void *)hid_fix_event_huion_kamvas16_gen3, + .hid_rdesc_fixup = (void *)hid_fix_rdesc_huion_kamvas16_gen3, +}; + +SEC("syscall") +int probe(struct hid_bpf_probe_args *ctx) +{ + switch (ctx->rdesc_size) { + case VENDOR_DESCRIPTOR_LENGTH: + case TABLET_DESCRIPTOR_LENGTH: + case WHEEL_DESCRIPTOR_LENGTH: + ctx->retval = 0; + break; + default: + ctx->retval = -EINVAL; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; From 029dff1c31c488e85c666d5e7e992700cd57462b Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:25 +0100 Subject: [PATCH 1069/1075] HID: bpf: Add fixup for Logitech SpaceNavigator variants The 3Dconnexion SpaceNavigator HID report descriptor declares its axis data to be "relative" when it is actually "absolute". This quirk was addressed in the kernel in 2.6.33, but some SpaceNavigator variants have a slightly different report descriptor whose axis input items are at different offsets than those assumed by the kernel fixup. Add a BPF fixup to handle both sets of offsets for known SpaceNavigator variants if the descriptor has not already been fixed by the kernel. Signed-off-by: Curran Muhlberger Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/181 Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .../bpf/progs/Logitech__SpaceNavigator.bpf.c | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 drivers/hid/bpf/progs/Logitech__SpaceNavigator.bpf.c diff --git a/drivers/hid/bpf/progs/Logitech__SpaceNavigator.bpf.c b/drivers/hid/bpf/progs/Logitech__SpaceNavigator.bpf.c new file mode 100644 index 000000000000..b17719d6d9c7 --- /dev/null +++ b/drivers/hid/bpf/progs/Logitech__SpaceNavigator.bpf.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Curran Muhlberger + */ + +#include "vmlinux.h" +#include "hid_bpf.h" +#include "hid_bpf_helpers.h" +#include + +#define VID_LOGITECH 0x046D +#define PID_SPACENAVIGATOR 0xC626 + +HID_BPF_CONFIG( + HID_DEVICE(BUS_USB, HID_GROUP_ANY, VID_LOGITECH, PID_SPACENAVIGATOR) +); + +/* + * The 3Dconnexion SpaceNavigator 3D Mouse is a multi-axis controller with 6 + * axes (grouped as X,Y,Z and Rx,Ry,Rz). Axis data is absolute, but the report + * descriptor erroneously declares it to be relative. We fix the report + * descriptor to mark both axis collections as absolute. + * + * The kernel attempted to fix this in commit 24985cf68612 (HID: support + * Logitech/3DConnexion SpaceTraveler and SpaceNavigator), but the descriptor + * data offsets are incorrect for at least some SpaceNavigator units. + */ + +SEC(HID_BPF_RDESC_FIXUP) +int BPF_PROG(hid_fix_rdesc, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 4096 /* size */); + + if (!data) + return 0; /* EPERM check */ + + /* Offset of Input item in X,Y,Z and Rx,Ry,Rz collections for all known + * firmware variants. + * - 2009 model: X,Y,Z @ 32-33, Rx,Ry,Rz @ 49-50 (fixup originally + * applied in kernel) + * - 2016 model (size==228): X,Y,Z @ 36-37, Rx,Ry,Rz @ 53-54 + * + * The descriptor size of the 2009 model is not known, and there is evidence + * for at least two other variants (with sizes 202 & 217) besides the 2016 + * model, so we try all known offsets regardless of descriptor size. + */ + const u8 offsets[] = {32, 36, 49, 53}; + + for (size_t idx = 0; idx < ARRAY_SIZE(offsets); idx++) { + u8 offset = offsets[idx]; + + /* if Input (Data,Var,Rel) , make it Input (Data,Var,Abs) */ + if (data[offset] == 0x81 && data[offset + 1] == 0x06) + data[offset + 1] = 0x02; + } + + return 0; +} + +HID_BPF_OPS(logitech_spacenavigator) = { + .hid_rdesc_fixup = (void *)hid_fix_rdesc, +}; + +SEC("syscall") +int probe(struct hid_bpf_probe_args *ctx) +{ + /* Ensure report descriptor size matches one of the known variants. */ + if (ctx->rdesc_size != 202 && + ctx->rdesc_size != 217 && + ctx->rdesc_size != 228) { + ctx->retval = -EINVAL; + return 0; + } + + /* Check whether the kernel has already applied the fix. */ + if ((ctx->rdesc[32] == 0x81 && ctx->rdesc[33] == 0x02 && + ctx->rdesc[49] == 0x81 && ctx->rdesc[50] == 0x02) || + (ctx->rdesc[36] == 0x81 && ctx->rdesc[37] == 0x02 && + ctx->rdesc[53] == 0x81 && ctx->rdesc[54] == 0x02)) + ctx->retval = -EINVAL; + else + ctx->retval = 0; + + return 0; +} + +char _license[] SEC("license") = "GPL"; From 040adbe80135af3de7d924ea39db4bfabf57b66f Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:26 +0100 Subject: [PATCH 1070/1075] HID: bpf: Add support for the Waltop Batteryless Tablet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several bugs as outlined in udev-hid-bpf#66 and udev-hid-bpf!198: - pressure curve is far from linear - tilt range is ±60, not ±127 - pressing the second button sets both tip down and barrel switch Fix the second button by adding a Secondary Barrel Switch in the existing padding and check for the tip down/barrel switch down combo. When both values become true at the same time, set the Secondary Barrel Switch instead. Implement a custom pressure curve that maps the hardware range 0-102 linearly to the logical range 0-1224, and maps the hardware range 103-2047 logarithmically to the logical range 1232-2047. This mapping isn’t perfect, but it’s way more natural than the stock configuration. Signed-off-by: Peter Hutterer Signed-off-by: Jan Felix Langenbach Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/200 Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .../progs/WALTOP__Batteryless-Tablet.bpf.c | 321 ++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100644 drivers/hid/bpf/progs/WALTOP__Batteryless-Tablet.bpf.c diff --git a/drivers/hid/bpf/progs/WALTOP__Batteryless-Tablet.bpf.c b/drivers/hid/bpf/progs/WALTOP__Batteryless-Tablet.bpf.c new file mode 100644 index 000000000000..156d75af516d --- /dev/null +++ b/drivers/hid/bpf/progs/WALTOP__Batteryless-Tablet.bpf.c @@ -0,0 +1,321 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Red Hat + */ + +#include "vmlinux.h" +#include "hid_bpf.h" +#include "hid_bpf_helpers.h" +#include + +#define VID_WALTOP 0x172F +#define PID_BATTERYLESS_TABLET 0x0505 + +HID_BPF_CONFIG( + HID_DEVICE(BUS_USB, HID_GROUP_ANY, VID_WALTOP, PID_BATTERYLESS_TABLET) +); + +#define EXPECTED_RDESC_SIZE 335 +#define PEN_REPORT_ID 16 + +#define TIP_SWITCH BIT(0) +#define BARREL_SWITCH BIT(1) +#define SECONDARY_BARREL_SWITCH BIT(5) + +static __u8 last_button_state; + +static const __u8 fixed_rdesc[] = { + 0x05, 0x01, // Usage Page (Generic Desktop) + 0x09, 0x02, // Usage (Mouse) + 0xa1, 0x01, // Collection (Application) + 0x85, 0x01, // Report ID (1) + 0x09, 0x01, // Usage (Pointer) + 0xa1, 0x00, // Collection (Physical) + 0x05, 0x09, // Usage Page (Button) + 0x19, 0x01, // Usage Minimum (1) + 0x29, 0x05, // Usage Maximum (5) + 0x15, 0x00, // Logical Minimum (0) + 0x25, 0x01, // Logical Maximum (1) + 0x75, 0x01, // Report Size (1) + 0x95, 0x05, // Report Count (5) + 0x81, 0x02, // Input (Data,Var,Abs) + 0x75, 0x03, // Report Size (3) + 0x95, 0x01, // Report Count (1) + 0x81, 0x03, // Input (Cnst,Var,Abs) + 0x05, 0x01, // Usage Page (Generic Desktop) + 0x09, 0x30, // Usage (X) + 0x09, 0x31, // Usage (Y) + 0x09, 0x38, // Usage (Wheel) + 0x15, 0x81, // Logical Minimum (-127) + 0x25, 0x7f, // Logical Maximum (127) + 0x75, 0x08, // Report Size (8) + 0x95, 0x03, // Report Count (3) + 0x81, 0x06, // Input (Data,Var,Rel) + 0x05, 0x0c, // Usage Page (Consumer) + 0x15, 0x81, // Logical Minimum (-127) + 0x25, 0x7f, // Logical Maximum (127) + 0x75, 0x08, // Report Size (8) + 0x95, 0x01, // Report Count (1) + 0x0a, 0x38, 0x02, // Usage (AC Pan) + 0x81, 0x06, // Input (Data,Var,Rel) + 0xc0, // End Collection + 0xc0, // End Collection + 0x05, 0x0d, // Usage Page (Digitizers) + 0x09, 0x02, // Usage (Pen) + 0xa1, 0x01, // Collection (Application) + 0x85, 0x02, // Report ID (2) + 0x09, 0x20, // Usage (Stylus) + 0xa1, 0x00, // Collection (Physical) + 0x09, 0x00, // Usage (0x0000) + 0x15, 0x00, // Logical Minimum (0) + 0x26, 0xff, 0x00, // Logical Maximum (255) + 0x75, 0x08, // Report Size (8) + 0x95, 0x09, // Report Count (9) + 0x81, 0x02, // Input (Data,Var,Abs) + 0x09, 0x3f, // Usage (Azimuth) + 0x09, 0x40, // Usage (Altitude) + 0x15, 0x00, // Logical Minimum (0) + 0x26, 0xff, 0x00, // Logical Maximum (255) + 0x75, 0x08, // Report Size (8) + 0x95, 0x02, // Report Count (2) + 0xb1, 0x02, // Feature (Data,Var,Abs) + 0xc0, // End Collection + 0x85, 0x05, // Report ID (5) + 0x05, 0x0d, // Usage Page (Digitizers) + 0x09, 0x20, // Usage (Stylus) + 0xa1, 0x00, // Collection (Physical) + 0x09, 0x00, // Usage (0x0000) + 0x15, 0x00, // Logical Minimum (0) + 0x26, 0xff, 0x00, // Logical Maximum (255) + 0x75, 0x08, // Report Size (8) + 0x95, 0x07, // Report Count (7) + 0x81, 0x02, // Input (Data,Var,Abs) + 0xc0, // End Collection + 0x85, 0x0a, // Report ID (10) + 0x05, 0x0d, // Usage Page (Digitizers) + 0x09, 0x20, // Usage (Stylus) + 0xa1, 0x00, // Collection (Physical) + 0x09, 0x00, // Usage (0x0000) + 0x15, 0x00, // Logical Minimum (0) + 0x26, 0xff, 0x00, // Logical Maximum (255) + 0x75, 0x08, // Report Size (8) + 0x95, 0x07, // Report Count (7) + 0x81, 0x02, // Input (Data,Var,Abs) + 0xc0, // End Collection + 0x85, 0x10, // Report ID (16) + 0x09, 0x20, // Usage (Stylus) + 0xa1, 0x00, // Collection (Physical) + 0x09, 0x42, // Usage (Tip Switch) + 0x09, 0x44, // Usage (Barrel Switch) + 0x09, 0x3c, // Usage (Invert) + 0x09, 0x45, // Usage (Eraser) + 0x09, 0x32, // Usage (In Range) + 0x09, 0x5a, // Usage (Secondary Barrel Switch) <-- added + 0x15, 0x00, // Logical Minimum (0) + 0x25, 0x01, // Logical Maximum (1) + 0x75, 0x01, // Report Size (1) + 0x95, 0x06, // Report Count (6) <--- changed from 5 + 0x81, 0x02, // Input (Data,Var,Abs) + 0x95, 0x02, // Report Count (2) <--- changed from 3 + 0x81, 0x03, // Input (Cnst,Var,Abs) + 0x05, 0x01, // Usage Page (Generic Desktop) + 0x09, 0x30, // Usage (X) + 0x75, 0x10, // Report Size (16) + 0x95, 0x01, // Report Count (1) + 0xa4, // Push + 0x55, 0x0d, // Unit Exponent (-3) + 0x65, 0x33, // Unit (EnglishLinear: in³) + 0x15, 0x00, // Logical Minimum (0) + 0x26, 0x00, 0x7d, // Logical Maximum (32000) + 0x35, 0x00, // Physical Minimum (0) + 0x46, 0x00, 0x7d, // Physical Maximum (32000) + 0x81, 0x02, // Input (Data,Var,Abs) + 0x09, 0x31, // Usage (Y) + 0x15, 0x00, // Logical Minimum (0) + 0x26, 0x20, 0x4e, // Logical Maximum (20000) + 0x35, 0x00, // Physical Minimum (0) + 0x46, 0x20, 0x4e, // Physical Maximum (20000) + 0x81, 0x02, // Input (Data,Var,Abs) + 0x05, 0x0d, // Usage Page (Digitizers) + 0x09, 0x30, // Usage (Tip Pressure) + 0x15, 0x00, // Logical Minimum (0) + 0x26, 0xff, 0x07, // Logical Maximum (2047) + 0x35, 0x00, // Physical Minimum (0) + 0x46, 0xff, 0x07, // Physical Maximum (2047) + 0x81, 0x02, // Input (Data,Var,Abs) + 0x05, 0x0d, // Usage Page (Digitizers) + 0x09, 0x3d, // Usage (X Tilt) + 0x09, 0x3e, // Usage (Y Tilt) + 0x15, 0xc4, // Logical Minimum (-60) <- changed from -127 + 0x25, 0x3c, // Logical Maximum (60) <- changed from 127 + 0x75, 0x08, // Report Size (8) + 0x95, 0x02, // Report Count (2) + 0x81, 0x02, // Input (Data,Var,Abs) + 0xc0, // End Collection + 0xc0, // End Collection + 0x05, 0x01, // Usage Page (Generic Desktop) + 0x09, 0x06, // Usage (Keyboard) + 0xa1, 0x01, // Collection (Application) + 0x85, 0x0d, // Report ID (13) + 0x05, 0x07, // Usage Page (Keyboard/Keypad) + 0x19, 0xe0, // Usage Minimum (224) + 0x29, 0xe7, // Usage Maximum (231) + 0x15, 0x00, // Logical Minimum (0) + 0x25, 0x01, // Logical Maximum (1) + 0x75, 0x01, // Report Size (1) + 0x95, 0x08, // Report Count (8) + 0x81, 0x02, // Input (Data,Var,Abs) + 0x75, 0x08, // Report Size (8) + 0x95, 0x01, // Report Count (1) + 0x81, 0x01, // Input (Cnst,Arr,Abs) + 0x05, 0x07, // Usage Page (Keyboard/Keypad) + 0x19, 0x00, // Usage Minimum (0) + 0x29, 0x65, // Usage Maximum (101) + 0x15, 0x00, // Logical Minimum (0) + 0x25, 0x65, // Logical Maximum (101) + 0x75, 0x08, // Report Size (8) + 0x95, 0x05, // Report Count (5) + 0x81, 0x00, // Input (Data,Arr,Abs) + 0xc0, // End Collection + 0x05, 0x0c, // Usage Page (Consumer) + 0x09, 0x01, // Usage (Consumer Control) + 0xa1, 0x01, // Collection (Application) + 0x85, 0x0c, // Report ID (12) + 0x09, 0xe9, // Usage (Volume Increment) + 0x09, 0xea, // Usage (Volume Decrement) + 0x09, 0xe2, // Usage (Mute) + 0x15, 0x00, // Logical Minimum (0) + 0x25, 0x01, // Logical Maximum (1) + 0x75, 0x01, // Report Size (1) + 0x95, 0x03, // Report Count (3) + 0x81, 0x06, // Input (Data,Var,Rel) + 0x75, 0x05, // Report Size (5) + 0x95, 0x01, // Report Count (1) + 0x81, 0x07, // Input (Cnst,Var,Rel) + 0xc0, // End Collection +}; + +static inline unsigned int bitwidth32(__u32 x) +{ + return 32 - __builtin_clzg(x, 32); +} + +static inline unsigned int floor_log2_32(__u32 x) +{ + return bitwidth32(x) - 1; +} + +/* Maps the interval [0, 2047] to itself using a scaled + * approximation of the function log2(x+1). + */ +static unsigned int scaled_log2(__u16 v) +{ + const unsigned int XMAX = 2047; + const unsigned int YMAX = 11; /* log2(2048) = 11 */ + + unsigned int x = v + 1; + unsigned int n = floor_log2_32(x); + unsigned int b = 1 << n; + + /* Fixed-point fraction in [0, 1), linearly + * interpolated using delta-y = 1 and + * delta-x = (2b - b) = b. + */ + unsigned int frac = (x - b) << YMAX; + unsigned int lerp = frac / b; + unsigned int log2 = (n << YMAX) + lerp; + + return ((log2 * XMAX) / YMAX) >> YMAX; +} + +SEC(HID_BPF_RDESC_FIXUP) +int BPF_PROG(hid_fix_rdesc, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 4096 /* size */); + + if (!data) + return 0; /* EPERM check */ + + __builtin_memcpy(data, fixed_rdesc, sizeof(fixed_rdesc)); + + return sizeof(fixed_rdesc); +} + +SEC(HID_BPF_DEVICE_EVENT) +int BPF_PROG(waltop_fix_events, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 10 /* size */); + + if (!data) + return 0; /* EPERM check */ + + __u8 report_id = data[0]; + + if (report_id != PEN_REPORT_ID) + return 0; + + /* On this tablet if the secondary barrel switch is pressed, + * the tablet sends tip down and barrel down. Change this to + * just secondary barrel down when there is no ambiguity. + * + * It's possible that there is a bug in the firmware and the + * device intends to set invert + eraser instead (i.e. the + * pysical button is an eraser button) but since + * the pressure is always zero, said eraser button + * would be useless anyway. + * + * So let's just change the button to secondary barrel down. + */ + + __u8 tip_switch = data[1] & TIP_SWITCH; + __u8 barrel_switch = data[1] & BARREL_SWITCH; + + __u8 tip_held = last_button_state & TIP_SWITCH; + __u8 barrel_held = last_button_state & BARREL_SWITCH; + + if (tip_switch && barrel_switch && !tip_held && !barrel_held) { + data[1] &= ~(TIP_SWITCH | BARREL_SWITCH); /* release tip and barrel */ + data[1] |= SECONDARY_BARREL_SWITCH; /* set secondary barrel switch */ + } + + last_button_state = data[1]; + + /* The pressure sensor on this tablet maps around half of the + * logical pressure range into the interval [0-100]. Further + * pressure causes the sensor value to increase exponentially + * up to a maximum value of 2047. + * + * The values 12 and 102 were chosen to have an integer slope + * with smooth transition between the two curves around the + * value 100. + */ + + __u16 pressure = (((__u16)data[6]) << 0) | (((__u16)data[7]) << 8); + + if (pressure <= 102) + pressure *= 12; + else + pressure = scaled_log2(pressure); + + data[6] = pressure >> 0; + data[7] = pressure >> 8; + + return 0; +} + +HID_BPF_OPS(waltop_batteryless) = { + .hid_device_event = (void *)waltop_fix_events, + .hid_rdesc_fixup = (void *)hid_fix_rdesc, +}; + +SEC("syscall") +int probe(struct hid_bpf_probe_args *ctx) +{ + if (ctx->rdesc_size == EXPECTED_RDESC_SIZE) + ctx->retval = 0; + else + ctx->retval = -EINVAL; + + return 0; +} + +char _license[] SEC("license") = "GPL"; From 8ba327d50263a026a41cc891fc8c09689c4b95e2 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:27 +0100 Subject: [PATCH 1071/1075] HID: bpf: Add support for the XP-Pen Deco 01 V3 This device needs a fix for the tilt range on the pen report descriptor and the usual conversion of the pad keys from the firmware's hardcoded keyboard shortcuts to actual pad buttons. Signed-off-by: Peter Hutterer Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/185 Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/bpf/progs/XPPen__Deco01V3.bpf.c | 305 ++++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 drivers/hid/bpf/progs/XPPen__Deco01V3.bpf.c diff --git a/drivers/hid/bpf/progs/XPPen__Deco01V3.bpf.c b/drivers/hid/bpf/progs/XPPen__Deco01V3.bpf.c new file mode 100644 index 000000000000..2502fcc9ede6 --- /dev/null +++ b/drivers/hid/bpf/progs/XPPen__Deco01V3.bpf.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Red Hat + */ + +#include "vmlinux.h" +#include "hid_bpf.h" +#include "hid_bpf_helpers.h" +#include "hid_report_helpers.h" +#include + +#define VID_UGEE 0x28BD /* VID is shared with SinoWealth and Glorious and prob others */ +#define PID_DECO_01_V3 0x0947 + +HID_BPF_CONFIG( + HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, VID_UGEE, PID_DECO_01_V3), +); + +/* + * Default report descriptor reports: + * - a report descriptor for the pad buttons, reported as key sequences + * - a report descriptor for the pen + * - a vendor-specific report descriptor + * + * The Pad report descriptor, see + * https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/issues/54 + * + * # Report descriptor length: 102 bytes + * 0x05, 0x01, // Usage Page (Generic Desktop) 0 + * 0x09, 0x02, // Usage (Mouse) 2 + * 0xa1, 0x01, // Collection (Application) 4 + * 0x85, 0x09, // Report ID (9) 6 + * 0x09, 0x01, // Usage (Pointer) 8 + * 0xa1, 0x00, // Collection (Physical) 10 + * 0x05, 0x09, // Usage Page (Button) 12 + * 0x19, 0x01, // UsageMinimum (1) 14 + * 0x29, 0x03, // UsageMaximum (3) 16 + * 0x15, 0x00, // Logical Minimum (0) 18 + * 0x25, 0x01, // Logical Maximum (1) 20 + * 0x95, 0x03, // Report Count (3) 22 + * 0x75, 0x01, // Report Size (1) 24 + * 0x81, 0x02, // Input (Data,Var,Abs) 26 + * 0x95, 0x05, // Report Count (5) 28 + * 0x81, 0x01, // Input (Cnst,Arr,Abs) 30 + * 0x05, 0x01, // Usage Page (Generic Desktop) 32 + * 0x09, 0x30, // Usage (X) 34 + * 0x09, 0x31, // Usage (Y) 36 + * 0x26, 0xff, 0x7f, // Logical Maximum (32767) 38 + * 0x95, 0x02, // Report Count (2) 41 + * 0x75, 0x10, // Report Size (16) 43 + * 0x81, 0x02, // Input (Data,Var,Abs) 45 + * 0x05, 0x0d, // Usage Page (Digitizers) 47 + * 0x09, 0x30, // Usage (Tip Pressure) 49 + * 0x26, 0xff, 0x07, // Logical Maximum (2047) 51 + * 0x95, 0x01, // Report Count (1) 54 + * 0x75, 0x10, // Report Size (16) 56 + * 0x81, 0x02, // Input (Data,Var,Abs) 58 + * 0xc0, // End Collection 60 + * 0xc0, // End Collection 61 + * 0x05, 0x01, // Usage Page (Generic Desktop) 62 + * 0x09, 0x06, // Usage (Keyboard) 64 + * 0xa1, 0x01, // Collection (Application) 66 + * 0x85, 0x06, // Report ID (6) 68 + * 0x05, 0x07, // Usage Page (Keyboard/Keypad) 70 + * 0x19, 0xe0, // UsageMinimum (224) 72 + * 0x29, 0xe7, // UsageMaximum (231) 74 + * 0x15, 0x00, // Logical Minimum (0) 76 + * 0x25, 0x01, // Logical Maximum (1) 78 + * 0x75, 0x01, // Report Size (1) 80 + * 0x95, 0x08, // Report Count (8) 82 + * 0x81, 0x02, // Input (Data,Var,Abs) 84 + * 0x05, 0x07, // Usage Page (Keyboard/Keypad) 86 + * 0x19, 0x00, // UsageMinimum (0) 88 + * 0x29, 0xff, // UsageMaximum (255) 90 + * 0x26, 0xff, 0x00, // Logical Maximum (255) 92 + * 0x75, 0x08, // Report Size (8) 95 + * 0x95, 0x06, // Report Count (6) 97 + * 0x81, 0x00, // Input (Data,Arr,Abs) 99 + * 0xc0, // End Collection 101 + * + * And key events for buttons top->bottom are: + * Buttons released: 06 00 00 00 00 00 00 00 + * Button1: 06 00 05 00 00 00 00 00 -> b + * Button2: 06 00 08 00 00 00 00 00 -> e + * Button3: 06 04 00 00 00 00 00 00 -> LAlt + * Button4: 06 00 2c 00 00 00 00 00 -> Space + * Button5: 06 01 16 00 00 00 00 00 -> LControl + s + * Button6: 06 01 1d 00 00 00 00 00 -> LControl + z + * Button7: 06 01 57 00 00 00 00 00 -> LControl + Keypad Plus + * Button8: 06 01 56 00 00 00 00 00 -> LControl + Keypad Dash + * + * When multiple buttons are pressed at the same time, the values used to + * identify the buttons are identical, but they appear in different bytes of the + * record. For example, when button 2 (0x08) and button 1 (0x05) are pressed, + * this is the report: + * + * Buttons 2 and 1: 06 00 08 05 00 00 00 00 -> e + b + * + * Buttons 1, 2, 4, 5 and 6 can be matched by finding their values in the + * report. + * + * Button 3 is pressed when the 3rd bit is 1. For example, pressing buttons 3 + * and 5 generates this report: + * + * Buttons 3 and 5: 06 05 16 00 00 00 00 00 -> LControl + LAlt + s + * -- -- + * | | + * | `- Button 5 (0x16) + * `- 0x05 = 0101. Button 3 is pressed + * ^ + * + * pad_buttons contains a list of buttons that can be matched in + * HID_BPF_DEVICE_EVENT. Button 3 as it has a dedicated bit. + * + * + * The Pen report descriptor announces a wrong tilt range: + * + * Report descriptor length: 109 bytes + * 0x05, 0x0d, // Usage Page (Digitizers) 0 + * 0x09, 0x02, // Usage (Pen) 2 + * 0xa1, 0x01, // Collection (Application) 4 + * 0x85, 0x07, // Report ID (7) 6 + * 0x09, 0x20, // Usage (Stylus) 8 + * 0xa1, 0x01, // Collection (Application) 10 + * 0x09, 0x42, // Usage (Tip Switch) 12 + * 0x09, 0x44, // Usage (Barrel Switch) 14 + * 0x09, 0x45, // Usage (Eraser) 16 + * 0x09, 0x3c, // Usage (Invert) 18 + * 0x15, 0x00, // Logical Minimum (0) 20 + * 0x25, 0x01, // Logical Maximum (1) 22 + * 0x75, 0x01, // Report Size (1) 24 + * 0x95, 0x04, // Report Count (4) 26 + * 0x81, 0x02, // Input (Data,Var,Abs) 28 + * 0x95, 0x01, // Report Count (1) 30 + * 0x81, 0x03, // Input (Cnst,Var,Abs) 32 + * 0x09, 0x32, // Usage (In Range) 34 + * 0x95, 0x01, // Report Count (1) 36 + * 0x81, 0x02, // Input (Data,Var,Abs) 38 + * 0x95, 0x02, // Report Count (2) 40 + * 0x81, 0x03, // Input (Cnst,Var,Abs) 42 + * 0x75, 0x10, // Report Size (16) 44 + * 0x95, 0x01, // Report Count (1) 46 + * 0x35, 0x00, // Physical Minimum (0) 48 + * 0xa4, // Push 50 + * 0x05, 0x01, // Usage Page (Generic Desktop) 51 + * 0x09, 0x30, // Usage (X) 53 + * 0x65, 0x13, // Unit (EnglishLinear: in) 55 + * 0x55, 0x0d, // Unit Exponent (-3) 57 + * 0x46, 0x10, 0x27, // Physical Maximum (10000) 59 + * 0x26, 0xff, 0x7f, // Logical Maximum (32767) 62 + * 0x81, 0x02, // Input (Data,Var,Abs) 65 + * 0x09, 0x31, // Usage (Y) 67 + * 0x46, 0x6a, 0x18, // Physical Maximum (6250) 69 + * 0x26, 0xff, 0x7f, // Logical Maximum (32767) 72 + * 0x81, 0x02, // Input (Data,Var,Abs) 75 + * 0xb4, // Pop 77 + * 0x09, 0x30, // Usage (X) 78 + * 0x45, 0x00, // Physical Maximum (0) 80 + * 0x26, 0xff, 0x3f, // Logical Maximum (16383) 82 + * 0x81, 0x42, // Input (Data,Var,Abs,Null) 85 + * 0x09, 0x3d, // Usage (Start) 87 + * 0x15, 0x81, // Logical Minimum (-127) 89 <- Change from -127 to -60 + * 0x25, 0x7f, // Logical Maximum (127) 91 <- Change from 127 to 60 + * 0x75, 0x08, // Report Size (8) 93 + * 0x95, 0x01, // Report Count (1) 95 + * 0x81, 0x02, // Input (Data,Var,Abs) 97 + * 0x09, 0x3e, // Usage (Select) 99 + * 0x15, 0x81, // Logical Minimum (-127) 101 <- Change from -127 to -60 + * 0x25, 0x7f, // Logical Maximum (127) 103 <- Change from 127 to 60 + * 0x81, 0x02, // Input (Data,Var,Abs) 105 + * 0xc0, // End Collection 107 + * 0xc0, // End Collection 108 + */ + +#define PEN_REPORT_DESCRIPTOR_LENGTH 109 +#define PAD_REPORT_DESCRIPTOR_LENGTH 102 +#define PAD_REPORT_LENGTH 8 +#define PAD_REPORT_ID 6 +#define PAD_NUM_BUTTONS 8 + +static const __u8 fixed_rdesc_pad[] = { + UsagePage_GenericDesktop + Usage_GD_Keypad + CollectionApplication( + // Byte 0 in report is the report ID + ReportId(PAD_REPORT_ID) + ReportCount(1) + ReportSize(8) + UsagePage_Digitizers + Usage_Dig_TabletFunctionKeys + CollectionPhysical( + // Byte 1 is the button state + UsagePage_Button + UsageMinimum_i8(0x01) + UsageMaximum_i8(PAD_NUM_BUTTONS) + LogicalMinimum_i8(0x0) + LogicalMaximum_i8(0x1) + ReportCount(PAD_NUM_BUTTONS) + ReportSize(1) + Input(Var|Abs) + // Byte 2 in report - just exists so we get to be a tablet pad + UsagePage_Digitizers + Usage_Dig_BarrelSwitch // BTN_STYLUS + ReportCount(1) + ReportSize(1) + Input(Var|Abs) + ReportCount(7) // padding + Input(Const) + // Bytes 3/4 in report - just exists so we get to be a tablet pad + UsagePage_GenericDesktop + Usage_GD_X + Usage_GD_Y + ReportCount(2) + ReportSize(8) + Input(Var|Abs) + // Byte 5-7 are padding so we match the original report lengtth + ReportCount(3) + ReportSize(8) + Input(Const) + ) + ) +}; + +SEC(HID_BPF_RDESC_FIXUP) +int BPF_PROG(xppen_deco01v3_rdesc_fixup, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, HID_MAX_DESCRIPTOR_SIZE /* size */); + + const __u8 wrong_logical_range[] = {0x15, 0x81, 0x25, 0x7f}; + const __u8 correct_logical_range[] = {0x15, 0xc4, 0x25, 0x3c}; + + if (!data) + return 0; /* EPERM check */ + + switch (hctx->size) { + case PAD_REPORT_DESCRIPTOR_LENGTH: + __builtin_memcpy(data, fixed_rdesc_pad, sizeof(fixed_rdesc_pad)); + return sizeof(fixed_rdesc_pad); + case PEN_REPORT_DESCRIPTOR_LENGTH: + if (__builtin_memcmp(&data[89], wrong_logical_range, + sizeof(wrong_logical_range)) == 0) + __builtin_memcpy(&data[89], correct_logical_range, + sizeof(correct_logical_range)); + if (__builtin_memcmp(&data[101], wrong_logical_range, + sizeof(wrong_logical_range)) == 0) + __builtin_memcpy(&data[101], correct_logical_range, + sizeof(correct_logical_range)); + break; + } + + return 0; +} + +SEC(HID_BPF_DEVICE_EVENT) +int BPF_PROG(xppen_deco01v3_device_event, struct hid_bpf_ctx *hctx) +{ + static const __u8 pad_buttons[] = { 0x05, 0x08, 0x00, 0x2c, 0x16, 0x1d, 0x57, 0x56 }; + __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, PAD_REPORT_LENGTH /* size */); + + if (!data) + return 0; /* EPERM check */ + + if (data[0] == PAD_REPORT_ID) { + __u8 button_mask = 0; + size_t d, b; + + /* data[1] stores the status of BTN_2 in the 3rd bit*/ + if (data[1] & BIT(2)) + button_mask |= BIT(2); + + /* The rest of the descriptor stores the buttons as in pad_buttons */ + for (d = 2; d < 8; d++) { + for (b = 0; b < sizeof(pad_buttons); b++) { + if (data[d] != 0 && data[d] == pad_buttons[b]) + button_mask |= BIT(b); + } + } + + __u8 report[8] = {PAD_REPORT_ID, button_mask, 0x00}; + + __builtin_memcpy(data, report, sizeof(report)); + } + return 0; +} + +HID_BPF_OPS(xppen_deco01v3) = { + .hid_rdesc_fixup = (void *)xppen_deco01v3_rdesc_fixup, + .hid_device_event = (void *)xppen_deco01v3_device_event, +}; + +SEC("syscall") +int probe(struct hid_bpf_probe_args *ctx) +{ + switch (ctx->rdesc_size) { + case PAD_REPORT_DESCRIPTOR_LENGTH: + case PEN_REPORT_DESCRIPTOR_LENGTH: + ctx->retval = 0; + break; + default: + ctx->retval = -EINVAL; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; From 88b5468f2cc564d532999c2fa068e158f5123691 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:28 +0100 Subject: [PATCH 1072/1075] HID: bpf: Add support for XP-Pen Deco02 Modifies report to have tablet buttons report as buttons, rather than as keyboard key combinations. The dial is also converted to a relative input, using the dedicated bit previously reserved for modifier key information. Signed-off-by: Hannah Pittman Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/203 Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/bpf/progs/XPPen__Deco02.bpf.c | 359 ++++++++++++++++++++++ 1 file changed, 359 insertions(+) create mode 100644 drivers/hid/bpf/progs/XPPen__Deco02.bpf.c diff --git a/drivers/hid/bpf/progs/XPPen__Deco02.bpf.c b/drivers/hid/bpf/progs/XPPen__Deco02.bpf.c new file mode 100644 index 000000000000..4b2549031e56 --- /dev/null +++ b/drivers/hid/bpf/progs/XPPen__Deco02.bpf.c @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "vmlinux.h" +#include "hid_bpf.h" +#include "hid_bpf_helpers.h" +#include "hid_report_helpers.h" +#include + +#define VID_UGEE 0x28BD +#define PID_DECO_02 0x0803 + +HID_BPF_CONFIG( + HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, VID_UGEE, PID_DECO_02), +); + +/* + * Devices are: + * - Pad input, including pen (This is the only one we are interested in) + * - Pen input as mouse + * - Vendor + * + * Descriptors on main device are: + * - 7: Pen + * - 6: Vendor settings? Unclear + * - 3: Keyboard (This is what we want to modify) + * - 5: Feature report + * + * This creates three event nodes: + * - XP-PEN DECO 02 Stylus + * - XP-PEN DECO 02 + * - XP-PEN DECO 02 Keyboard (Again, what we want to modify) + * + * # Report descriptor length: 188 bytes + * # 0x05, 0x0d, // Usage Page (Digitizers) 0 + * # 0x09, 0x02, // Usage (Pen) 2 + * # 0xa1, 0x01, // Collection (Application) 4 + * # 0x85, 0x07, // Report ID (7) 6 + * # 0x09, 0x20, // Usage (Stylus) 8 + * # 0xa1, 0x00, // Collection (Physical) 10 + * # 0x09, 0x42, // Usage (Tip Switch) 12 + * # 0x09, 0x44, // Usage (Barrel Switch) 14 + * # 0x09, 0x45, // Usage (Eraser) 16 + * # 0x09, 0x3c, // Usage (Invert) 18 + * # 0x09, 0x32, // Usage (In Range) 20 + * # 0x15, 0x00, // Logical Minimum (0) 22 + * # 0x25, 0x01, // Logical Maximum (1) 24 + * # 0x75, 0x01, // Report Size (1) 26 + * # 0x95, 0x05, // Report Count (5) 28 + * # 0x81, 0x02, // Input (Data,Var,Abs) 30 + * # 0x95, 0x03, // Report Count (3) 32 + * # 0x81, 0x03, // Input (Cnst,Var,Abs) 34 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 36 + * # 0x09, 0x30, // Usage (X) 38 + * # 0x15, 0x00, // Logical Minimum (0) 40 + * # 0x26, 0x50, 0x57, // Logical Maximum (22352) 42 + * # 0x55, 0x0d, // Unit Exponent (-3) 45 + * # 0x65, 0x13, // Unit (EnglishLinear: in) 47 + * # 0x35, 0x00, // Physical Minimum (0) 49 + * # 0x46, 0x50, 0x57, // Physical Maximum (22352) 51 + * # 0x75, 0x10, // Report Size (16) 54 + * # 0x95, 0x01, // Report Count (1) 56 + * # 0x81, 0x02, // Input (Data,Var,Abs) 58 + * # 0x09, 0x31, // Usage (Y) 60 + * # 0x15, 0x00, // Logical Minimum (0) 62 + * # 0x26, 0x92, 0x36, // Logical Maximum (13970) 64 + * # 0x55, 0x0d, // Unit Exponent (-3) 67 + * # 0x65, 0x13, // Unit (EnglishLinear: in) 69 + * # 0x35, 0x00, // Physical Minimum (0) 71 + * # 0x46, 0x92, 0x36, // Physical Maximum (13970) 73 + * # 0x75, 0x10, // Report Size (16) 76 + * # 0x95, 0x01, // Report Count (1) 78 + * # 0x81, 0x02, // Input (Data,Var,Abs) 80 + * # 0x05, 0x0d, // Usage Page (Digitizers) 82 + * # 0x09, 0x30, // Usage (Tip Pressure) 84 + * # 0x15, 0x00, // Logical Minimum (0) 86 + * # 0x26, 0xff, 0x1f, // Logical Maximum (8191) 88 + * # 0x75, 0x10, // Report Size (16) 91 + * # 0x95, 0x01, // Report Count (1) 93 + * # 0x81, 0x02, // Input (Data,Var,Abs) 95 + * # 0xc0, // End Collection 97 + * # 0xc0, // End Collection 98 + * # 0x09, 0x0e, // Usage (Device Configuration) 99 + * # 0xa1, 0x01, // Collection (Application) 101 + * # 0x85, 0x05, // Report ID (5) 103 + * # 0x09, 0x23, // Usage (Device Settings) 105 + * # 0xa1, 0x02, // Collection (Logical) 107 + * # 0x09, 0x52, // Usage (Inputmode) 109 + * # 0x09, 0x53, // Usage (Device Index) 111 + * # 0x25, 0x0a, // Logical Maximum (10) 113 + * # 0x75, 0x08, // Report Size (8) 115 + * # 0x95, 0x02, // Report Count (2) 117 + * # 0xb1, 0x02, // Feature (Data,Var,Abs) 119 + * # 0xc0, // End Collection 121 + * # 0xc0, // End Collection 122 + * # 0x05, 0x0c, // Usage Page (Consumer Devices) 123 + * # 0x09, 0x36, // Usage (Function Buttons) 125 + * # 0xa1, 0x00, // Collection (Physical) 127 + * # 0x85, 0x06, // Report ID (6) 129 + * # 0x05, 0x09, // Usage Page (Button) 131 + * # 0x19, 0x01, // Usage Minimum (1) 133 + * # 0x29, 0x20, // Usage Maximum (32) 135 + * # 0x15, 0x00, // Logical Minimum (0) 137 + * # 0x25, 0x01, // Logical Maximum (1) 139 + * # 0x95, 0x20, // Report Count (32) 141 + * # 0x75, 0x01, // Report Size (1) 143 + * # 0x81, 0x02, // Input (Data,Var,Abs) 145 + * # 0xc0, // End Collection 147 + * # 0x05, 0x01, // Usage Page (Generic Desktop) 148 + * # 0x09, 0x06, // Usage (Keyboard) 150 + * # 0xa1, 0x01, // Collection (Application) 152 + * # 0x85, 0x03, // Report ID (3) 154 + * # 0x05, 0x07, // Usage Page (Keyboard) 156 + * # 0x19, 0xe0, // Usage Minimum (224) 158 + * # 0x29, 0xe7, // Usage Maximum (231) 160 + * # 0x15, 0x00, // Logical Minimum (0) 162 + * # 0x25, 0x01, // Logical Maximum (1) 164 + * # 0x75, 0x01, // Report Size (1) 166 + * # 0x95, 0x08, // Report Count (8) 168 + * # 0x81, 0x02, // Input (Data,Var,Abs) 170 + * # 0x05, 0x07, // Usage Page (Keyboard) 172 + * # 0x19, 0x00, // Usage Minimum (0) 174 + * # 0x29, 0xff, // Usage Maximum (255) 176 + * # 0x26, 0xff, 0x00, // Logical Maximum (255) 178 + * # 0x75, 0x08, // Report Size (8) 181 + * # 0x95, 0x06, // Report Count (6) 183 + * # 0x81, 0x00, // Input (Data,Arr,Abs) 185 + * # 0xc0, // End Collection 187 + * + * Key events; top to bottom: + * Buttons released: 03 00 00 00 00 00 00 00 + * Button1: 03 00 05 00 00 00 00 00 -> 'b and B' + * Button2: 03 00 2c 00 00 00 00 00 -> 'Spacebar' + * Button3: 03 00 08 00 00 00 00 00 -> 'e and E' + * Button4: 03 00 0c 00 00 00 00 00 -> 'i and I' + * Button5: 03 05 1d 00 00 00 00 00 -> LeftControl + LeftAlt + 'z and Z' + * Button6: 03 01 16 00 00 00 00 00 -> LeftControl + 's and S' + * + * Dial Events: + * Clockwise: 03 01 2e 00 00 00 00 00 -> LeftControl + '= and +' + * Anticlockwise: 03 01 2d 00 00 00 00 00 -> LeftControl + '- and (underscore)' + * + * NOTE: Input event descriptions begin at byte 2, and progressively build + * towards byte 7 as each new key is pressed maintaining the press order. + * For example: + * BTN1 followed by BTN2 is 03 00 05 2c 00 00 00 00 + * BTN2 followed by BTN1 is 03 00 2c 05 00 00 00 00 + * + * Releasing a button causes its byte to be freed, and the next item in the list + * is pushed forwards. Dial events are released immediately after an event is + * registered (i.e. after each "click"), so will continually appear pushed + * backwards in the report. + * + * When a button with a modifier key is pressed, the button identifier stacks in + * an abnormal way, where the highest modifier byte always supersedes others. + * In these cases, the button with the higher modifier is always last. + * For example: + * BTN6 followed by BTN5 is 03 05 1d 16 00 00 00 00 + * BTN5 followed by BTN6 is 03 05 1d 16 00 00 00 00 + * BTN5 followed by BTN1 is 03 05 05 1d 00 00 00 00 + * + * For three button presses in order, demonstrating strictly above rules: + * BTN6, BTN1, BTN5 is 03 05 05 1d 16 00 00 00 + * BTN5, BTN1, BTN6 is 03 05 05 1d 16 00 00 00 + * + * In short, when BTN5/6 are pressed, the order of operations is lost, as they + * will always float to the end when pressed in combination with others. + * + * Fortunately, all states are recorded in the same way, with no overlaps. + * Byte 1 can be used as a spare for the wheel, since this is for mod keys. + */ + +#define RDESC_SIZE_PAD 188 +#define REPORT_SIZE_PAD 8 +#define REPORT_ID_BUTTONS 3 +#define PAD_BUTTON_COUNT 6 +#define RDESC_KEYBOARD_OFFSET 148 + +static const __u8 fixed_rdesc_pad[] = { + /* Copy of pen descriptor to avoid losing functionality */ + UsagePage_Digitizers + Usage_Dig_Pen + CollectionApplication( + ReportId(7) + Usage_Dig_Stylus + CollectionPhysical( + Usage_Dig_TipSwitch + Usage_Dig_BarrelSwitch + Usage_Dig_Eraser + Usage_Dig_Invert + Usage_Dig_InRange + LogicalMinimum_i8(0) + LogicalMaximum_i8(1) + ReportSize(1) + ReportCount(5) + Input(Var|Abs) + ReportCount(3) + Input(Const) /* Input (Const, Var, Abs) */ + UsagePage_GenericDesktop + Usage_GD_X + LogicalMinimum_i16(0) + LogicalMaximum_i16(22352) + UnitExponent(-3) + Unit(in) /* (EnglishLinear: in) */ + PhysicalMinimum_i16(0) + PhysicalMaximum_i16(22352) + ReportSize(16) + ReportCount(1) + Input(Var|Abs) + Usage_GD_Y + LogicalMinimum_i16(0) + LogicalMaximum_i16(13970) + UnitExponent(-3) + Unit(in) /* (EnglishLinear: in) */ + PhysicalMinimum_i16(0) + PhysicalMaximum_i16(13970) + ReportSize(16) + ReportCount(1) + Input(Var|Abs) + UsagePage_Digitizers + Usage_Dig_TipPressure + LogicalMinimum_i16(0) + LogicalMaximum_i16(8191) + ReportSize(16) + ReportCount(1) + Input(Var|Abs) + ) + ) + + /* FIXES BEGIN */ + UsagePage_GenericDesktop + Usage_GD_Keypad + CollectionApplication( + ReportId(REPORT_ID_BUTTONS) /* Retain original ID on byte 0 */ + ReportCount(1) + ReportSize(REPORT_SIZE_PAD) + UsagePage_Digitizers + Usage_Dig_TabletFunctionKeys + CollectionPhysical( + /* Byte 1: Dial state */ + UsagePage_GenericDesktop + Usage_GD_Dial + LogicalMinimum_i8(-1) + LogicalMaximum_i8(1) + ReportCount(1) + ReportSize(REPORT_SIZE_PAD) + Input(Var|Rel) + /* Byte 2: Button state */ + UsagePage_Button + ReportSize(1) + ReportCount(PAD_BUTTON_COUNT) + UsageMinimum_i8(0x01) + UsageMaximum_i8(PAD_BUTTON_COUNT) /* Number of buttons */ + LogicalMinimum_i8(0x0) + LogicalMaximum_i8(0x1) + Input(Var|Abs) + /* Byte 3: Exists to be tablet pad */ + UsagePage_Digitizers + Usage_Dig_BarrelSwitch + ReportCount(1) + ReportSize(1) + Input(Var|Abs) + ReportCount(7) /* Padding, to fill full report space */ + Input(Const) + /* Byte 4/5: Exists to be a tablet pad */ + UsagePage_GenericDesktop + Usage_GD_X + Usage_GD_Y + ReportCount(2) + ReportSize(8) + Input(Var|Abs) + /* Bytes 6/7: Padding, to match original length */ + ReportCount(2) + ReportSize(8) + Input(Const) + ) + FixedSizeVendorReport(RDESC_SIZE_PAD) + ) +}; + +SEC(HID_BPF_RDESC_FIXUP) +int BPF_PROG(xppen_deco02_rdesc_fixup, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0, HID_MAX_DESCRIPTOR_SIZE); + + if (!data) + return 0; /* EPERM Check */ + + if (hctx->size == RDESC_SIZE_PAD) { + __builtin_memcpy(data, fixed_rdesc_pad, sizeof(fixed_rdesc_pad)); + return sizeof(fixed_rdesc_pad); + } + + return 0; +} + +SEC(HID_BPF_DEVICE_EVENT) +int BPF_PROG(xppen_deco02_device_event, struct hid_bpf_ctx *hctx) +{ + __u8 *data = hid_bpf_get_data(hctx, 0, REPORT_SIZE_PAD); + + if (!data || data[0] != REPORT_ID_BUTTONS) + return 0; /* EPERM or wrong report */ + + __u8 dial_code = 0; + __u8 button_mask = 0; + size_t d; + + /* Start from 2; 0 is report ID, 1 is modifier keys, replaced by dial */ + for (d = 2; d < 8; d++) { + switch (data[d]) { + case 0x2e: + dial_code = 1; + break; + case 0x2d: + dial_code = -1; + break; + /* below are buttons, top to bottom */ + case 0x05: + button_mask |= BIT(0); + break; + case 0x2c: + button_mask |= BIT(1); + break; + case 0x08: + button_mask |= BIT(2); + break; + case 0x0c: + button_mask |= BIT(3); + break; + case 0x1d: + button_mask |= BIT(4); + break; + case 0x16: + button_mask |= BIT(05); + break; + default: + break; + } + } + + __u8 report[8] = { REPORT_ID_BUTTONS, dial_code, button_mask, 0x00 }; + + __builtin_memcpy(data, report, sizeof(report)); + return 0; +} + +HID_BPF_OPS(xppen_deco02) = { + .hid_rdesc_fixup = (void *)xppen_deco02_rdesc_fixup, + .hid_device_event = (void *)xppen_deco02_device_event, +}; + +SEC("syscall") +int probe(struct hid_bpf_probe_args *ctx) +{ + ctx->retval = ctx->rdesc_size != RDESC_SIZE_PAD ? -EINVAL : 0; + return 0; +} + +char _license[] SEC("license") = "GPL"; From 5e3e8f1b44e420800f62c993878bf6ad43adb038 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:29 +0100 Subject: [PATCH 1073/1075] HID: bpf: add heuristics to the Huion Inspiroy 2S eraser button When pressing the phsyical eraser button (remapped by us to the Secondary Barrel Switch) while the tip is down, the device gives us several false reports with a Tip Switch 0: press| |release SBS: [0 0 ... 1 1 1 ... 1 0 0 0 0 0 0 ...] TS: [1 1 ... 1 0 1 ... 1 1 0 0 0 1 1 ...] In both press/release the number of Tip Switch 0 reports can be up to 4 and *sometimes* the Tip Switch is released in the same report as the button press/release event. Paper over this by forcing the tip down for a few reports if it was down before the button toggled. Signed-off-by: Peter Hutterer Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/195 Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- .../hid/bpf/progs/Huion__Inspiroy-2-S.bpf.c | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/hid/bpf/progs/Huion__Inspiroy-2-S.bpf.c b/drivers/hid/bpf/progs/Huion__Inspiroy-2-S.bpf.c index 13f64fb49800..79453362bf97 100644 --- a/drivers/hid/bpf/progs/Huion__Inspiroy-2-S.bpf.c +++ b/drivers/hid/bpf/progs/Huion__Inspiroy-2-S.bpf.c @@ -163,6 +163,9 @@ char EXPECTED_FIRMWARE_ID[] = "HUION_T21j_"; __u8 last_button_state; +__u8 last_tip_state; +__u8 last_sec_barrel_state; +__u8 force_tip_down_count; static const __u8 fixed_rdesc_pad[] = { UsagePage_GenericDesktop @@ -522,9 +525,31 @@ int BPF_PROG(inspiroy_2_fix_events, struct hid_bpf_ctx *hctx) pad_report->wheel = wheel; return sizeof(struct pad_report); - } + } else if (data[1] & 0x80) { /* Pen reports with InRange 1 */ + __u8 tip_state = data[1] & 0x1; + __u8 sec_barrel_state = data[1] & 0x4; - /* Pen reports need nothing done */ + if (force_tip_down_count > 0) { + data[1] |= 0x1; + --force_tip_down_count; + if (tip_state) + force_tip_down_count = 0; + } + + /* Tip was down and we just pressed or released the + * secondary barrel switch (the physical eraser + * button). The device will send up to 4 + * reports with Tip Switch 0 and sometimes + * this report has Tip Switch 0. + */ + if (last_tip_state && + last_sec_barrel_state != sec_barrel_state) { + force_tip_down_count = 4; + data[1] |= 0x1; + } + last_tip_state = tip_state; + last_sec_barrel_state = sec_barrel_state; + } } return 0; From 71570e8fb760027842c0e748c669d5bf87dfba65 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:30 +0100 Subject: [PATCH 1074/1075] HID: bpf: add the Huion Kamvas 27 Pro Same issues with a secondary tip switch instead of secondary barrel switch as the Kamvas 19. Copy the stable Kamvas 19 support back into testing and add the vid/pid for the Kamvas 27. Signed-off-by: Peter Hutterer Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/189 Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/bpf/progs/Huion__Kamvas-Pro-19.bpf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/hid/bpf/progs/Huion__Kamvas-Pro-19.bpf.c b/drivers/hid/bpf/progs/Huion__Kamvas-Pro-19.bpf.c index 489cb4fcc2cd..5f43e4071848 100644 --- a/drivers/hid/bpf/progs/Huion__Kamvas-Pro-19.bpf.c +++ b/drivers/hid/bpf/progs/Huion__Kamvas-Pro-19.bpf.c @@ -9,12 +9,15 @@ #define VID_HUION 0x256C #define PID_KAMVAS_PRO_19 0x006B +#define PID_KAMVAS_PRO_27 0x006c #define NAME_KAMVAS_PRO_19 "HUION Huion Tablet_GT1902" +#define NAME_KAMVAS_PRO_27 "HUION Huion Tablet_GT2701" #define TEST_PREFIX "uhid test " HID_BPF_CONFIG( HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, VID_HUION, PID_KAMVAS_PRO_19), + HID_DEVICE(BUS_USB, HID_GROUP_MULTITOUCH_WIN_8, VID_HUION, PID_KAMVAS_PRO_27), ); bool prev_was_out_of_range; @@ -351,7 +354,8 @@ int probe(struct hid_bpf_probe_args *ctx) if (!__builtin_memcmp(name, TEST_PREFIX, sizeof(TEST_PREFIX) - 1)) name += sizeof(TEST_PREFIX) - 1; - if (__builtin_memcmp(name, NAME_KAMVAS_PRO_19, sizeof(NAME_KAMVAS_PRO_19))) + if (__builtin_memcmp(name, NAME_KAMVAS_PRO_19, sizeof(NAME_KAMVAS_PRO_19)) && + __builtin_memcmp(name, NAME_KAMVAS_PRO_27, sizeof(NAME_KAMVAS_PRO_27))) ctx->retval = -EINVAL; hid_bpf_release_context(hctx); From 3b86c87f8dcbb8ba3b00d7adf7ccfff086f0f23e Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Tue, 18 Nov 2025 18:16:31 +0100 Subject: [PATCH 1075/1075] HID: bpf: fix typo in HID usage table We could go to the USB consortium, but it's probably easier that way. And update HID usage table json generated file from https://usb.org/sites/default/files/hut1_6.pdf updated: 01/30/2025 Reported-by: Colin Ian King Link: https://gitlab.freedesktop.org/libevdev/udev-hid-bpf/-/merge_requests/191 Signed-off-by: Benjamin Tissoires Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/bpf/progs/hid_report_helpers.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/hid/bpf/progs/hid_report_helpers.h b/drivers/hid/bpf/progs/hid_report_helpers.h index 9b2a48e4a311..9944ff54d31d 100644 --- a/drivers/hid/bpf/progs/hid_report_helpers.h +++ b/drivers/hid/bpf/progs/hid_report_helpers.h @@ -143,8 +143,11 @@ * report with Report ID 0xac of the given size in bytes. * The size is inclusive of the 1 byte Report ID prefix. * - * HID-BPF requires that at least one report has - * the same size as the original report from the device. + * The kernel discards any HID reports that are larger + * than the largest report in a HID report descriptor. + * Thus at least one report must have (at least) + * the same size as the largest original report from + * the device. * The easy way to ensure that is to add this * macro as the last element of your CollectionApplication * other reports can be of any size less than this. @@ -295,6 +298,7 @@ #define Usage_GD_SystemSpeakerMute Usage_i8(0xa7) #define Usage_GD_SystemHibernate Usage_i8(0xa8) #define Usage_GD_SystemMicrophoneMute Usage_i8(0xa9) +#define Usage_GD_SystemAccessibilityBinding Usage_i8(0xaa) #define Usage_GD_SystemDisplayInvert Usage_i8(0xb0) #define Usage_GD_SystemDisplayInternal Usage_i8(0xb1) #define Usage_GD_SystemDisplayExternal Usage_i8(0xb2) @@ -2669,7 +2673,7 @@ #define Usage_BS_iDeviceName Usage_i8(0x88) #define Usage_BS_iDeviceChemistry Usage_i8(0x89) #define Usage_BS_ManufacturerData Usage_i8(0x8a) -#define Usage_BS_Rechargable Usage_i8(0x8b) +#define Usage_BS_Rechargeable Usage_i8(0x8b) #define Usage_BS_WarningCapacityLimit Usage_i8(0x8c) #define Usage_BS_CapacityGranularity1 Usage_i8(0x8d) #define Usage_BS_CapacityGranularity2 Usage_i8(0x8e)