From b14bb2e7821bdd133afeb5e623fd6c5a2273ecf6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 18 Sep 2025 12:50:26 +0300 Subject: [PATCH 001/263] tee: qcom: prevent potential off by one read Re-order these checks to check if "i" is a valid array index before using it. This prevents a potential off by one read access. Fixes: d6e290837e50 ("tee: add Qualcomm TEE driver") Signed-off-by: Dan Carpenter Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/qcomtee/call.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/qcomtee/call.c b/drivers/tee/qcomtee/call.c index cc17a48d0ab7..ac134452cc9c 100644 --- a/drivers/tee/qcomtee/call.c +++ b/drivers/tee/qcomtee/call.c @@ -308,7 +308,7 @@ static int qcomtee_params_from_args(struct tee_param *params, } /* Release any IO and OO objects not processed. */ - for (; u[i].type && i < num_params; i++) { + for (; i < num_params && u[i].type; i++) { if (u[i].type == QCOMTEE_ARG_TYPE_OO || u[i].type == QCOMTEE_ARG_TYPE_IO) qcomtee_object_put(u[i].o); From a9ee2c461e5c361545f0c45e9f149159ba369c64 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 18 Sep 2025 12:50:41 +0300 Subject: [PATCH 002/263] tee: qcom: return -EFAULT instead of -EINVAL if copy_from_user() fails If copy_from_user() fails, the correct error code is -EFAULT, not -EINVAL. Signed-off-by: Dan Carpenter Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- drivers/tee/qcomtee/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tee/qcomtee/core.c b/drivers/tee/qcomtee/core.c index 783acc59cfa9..b6715ada7700 100644 --- a/drivers/tee/qcomtee/core.c +++ b/drivers/tee/qcomtee/core.c @@ -424,7 +424,7 @@ static int qcomtee_prepare_msg(struct qcomtee_object_invoke_ctx *oic, if (!(u[i].flags & QCOMTEE_ARG_FLAGS_UADDR)) memcpy(msgptr, u[i].b.addr, u[i].b.size); else if (copy_from_user(msgptr, u[i].b.uaddr, u[i].b.size)) - return -EINVAL; + return -EFAULT; offset += qcomtee_msg_offset_align(u[i].b.size); ib++; From f12b69d8f22824a07f17c1399c99757072de73e0 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 27 Sep 2025 19:39:08 +0200 Subject: [PATCH 003/263] batman-adv: Release references to inactive interfaces Trying to dump the originators or the neighbors via netlink for a meshif with an inactive primary interface is not allowed. The dump functions were checking this correctly but they didn't handle non-existing primary interfaces and existing _inactive_ interfaces differently. (Primary) batadv_hard_ifaces hold a references to a net_device. And accessing them is only allowed when either being in a RCU/spinlock protected section or when holding a valid reference to them. The netlink dump functions use the latter. But because the missing specific error handling for inactive primary interfaces, the reference was never dropped. This reference counting error was only detected when the interface should have been removed from the system: unregister_netdevice: waiting for batadv_slave_0 to become free. Usage count = 2 Cc: stable@vger.kernel.org Fixes: 6ecc4fd6c2f4 ("batman-adv: netlink: reduce duplicate code by returning interfaces") Reported-by: syzbot+881d65229ca4f9ae8c84@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/originator.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index a464ff96b929..ed89d7fd1e7f 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -764,11 +764,16 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + if (!primary_if) { ret = -ENOENT; goto out_put_mesh_iface; } + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out_put_primary_if; + } + hard_iface = batadv_netlink_get_hardif(bat_priv, cb); if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) { ret = PTR_ERR(hard_iface); @@ -1333,11 +1338,16 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) bat_priv = netdev_priv(mesh_iface); primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + if (!primary_if) { ret = -ENOENT; goto out_put_mesh_iface; } + if (primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out_put_primary_if; + } + hard_iface = batadv_netlink_get_hardif(bat_priv, cb); if (IS_ERR(hard_iface) && PTR_ERR(hard_iface) != -ENONET) { ret = PTR_ERR(hard_iface); From 3b63efa21bc6acc1a0fadd1dd0f0e1988a4c0177 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 30 Sep 2025 16:44:27 +0200 Subject: [PATCH 004/263] tee: QCOMTEE should depend on ARCH_QCOM The Qualcomm Trusted Execution Environment (QTEE) is only available on Qualcomm SoCs. Hence add a dependency on ARCH_QCOM, to prevent asking the user about this driver when configuring a kernel without Qualcomm platform support. Fixes: d6e290837e50f73f ("tee: add Qualcomm TEE driver") Signed-off-by: Geert Uytterhoeven Reviewed-by: Konrad Dybcio Signed-off-by: Jens Wiklander --- drivers/tee/qcomtee/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tee/qcomtee/Kconfig b/drivers/tee/qcomtee/Kconfig index 927686abceb1..9f19dee08db4 100644 --- a/drivers/tee/qcomtee/Kconfig +++ b/drivers/tee/qcomtee/Kconfig @@ -2,6 +2,7 @@ # Qualcomm Trusted Execution Environment Configuration config QCOMTEE tristate "Qualcomm TEE Support" + depends on ARCH_QCOM || COMPILE_TEST depends on !CPU_BIG_ENDIAN select QCOM_SCM select QCOM_TZMEM_MODE_SHMBRIDGE From 4092fc5f35cecb01d59b2cdf7740b203eac6948a Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 1 Oct 2025 19:31:12 +0100 Subject: [PATCH 005/263] spi: dt-bindings: cadence: add soc-specific compatible strings for zynqmp and versal-net When the binding for the Cadence spi controller was written, a dedicated compatible was added for the zynq device. Later when zynqmp and versal-net, which also use this spi controller IP, were added they did not receive soc-specific compatibles. Add them now, with a fallback to the existing compatible for the r1p6 version of the IP so that there will be no functional change. Retain the r1p6 in the string, to match what was done for zynq. Disallow the cdns,spi-r1p6 compatible in isolation to "encourage" people to actually add soc-specific compatible strings in the future. Signed-off-by: Conor Dooley Acked-by: Michal Simek Link: https://patch.msgid.link/20251001-basics-grafting-a1a214ef65ac@spud Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/spi-cadence.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/spi/spi-cadence.yaml b/Documentation/devicetree/bindings/spi/spi-cadence.yaml index 8de96abe9da1..27414b78d61d 100644 --- a/Documentation/devicetree/bindings/spi/spi-cadence.yaml +++ b/Documentation/devicetree/bindings/spi/spi-cadence.yaml @@ -14,9 +14,14 @@ allOf: properties: compatible: - enum: - - cdns,spi-r1p6 - - xlnx,zynq-spi-r1p6 + oneOf: + - enum: + - xlnx,zynq-spi-r1p6 + - items: + - enum: + - xlnx,zynqmp-spi-r1p6 + - xlnx,versal-net-spi-r1p6 + - const: cdns,spi-r1p6 reg: maxItems: 1 From ee795e82e10197c070efd380dc9615c73dffad6c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 3 Oct 2025 13:42:39 +0200 Subject: [PATCH 006/263] spi: rockchip-sfc: Fix DMA-API usage Use DMA-API dma_map_single() call for getting the DMA address of the transfer buffer instead of hacking with virt_to_phys(). This fixes the following DMA-API debug warning: ------------[ cut here ]------------ DMA-API: rockchip-sfc fe300000.spi: device driver tries to sync DMA memory it has not allocated [device address=0x000000000cf70000] [size=288 bytes] WARNING: kernel/dma/debug.c:1106 at check_sync+0x1d8/0x690, CPU#2: systemd-udevd/151 Modules linked in: ... Hardware name: Hardkernel ODROID-M1 (DT) pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : check_sync+0x1d8/0x690 lr : check_sync+0x1d8/0x690 .. Call trace: check_sync+0x1d8/0x690 (P) debug_dma_sync_single_for_cpu+0x84/0x8c __dma_sync_single_for_cpu+0x88/0x234 rockchip_sfc_exec_mem_op+0x4a0/0x798 [spi_rockchip_sfc] spi_mem_exec_op+0x408/0x498 spi_nor_read_data+0x170/0x184 spi_nor_read_sfdp+0x74/0xe4 spi_nor_parse_sfdp+0x120/0x11f0 spi_nor_sfdp_init_params_deprecated+0x3c/0x8c spi_nor_scan+0x690/0xf88 spi_nor_probe+0xe4/0x304 spi_mem_probe+0x6c/0xa8 spi_probe+0x94/0xd4 really_probe+0xbc/0x298 ... Fixes: b69386fcbc60 ("spi: rockchip-sfc: Using normal memory for dma") Signed-off-by: Marek Szyprowski Link: https://patch.msgid.link/20251003114239.431114-1-m.szyprowski@samsung.com Signed-off-by: Mark Brown --- drivers/spi/spi-rockchip-sfc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-rockchip-sfc.c b/drivers/spi/spi-rockchip-sfc.c index 9eba5c0a60f2..b3c2b03b1153 100644 --- a/drivers/spi/spi-rockchip-sfc.c +++ b/drivers/spi/spi-rockchip-sfc.c @@ -704,7 +704,12 @@ static int rockchip_sfc_probe(struct platform_device *pdev) ret = -ENOMEM; goto err_dma; } - sfc->dma_buffer = virt_to_phys(sfc->buffer); + sfc->dma_buffer = dma_map_single(dev, sfc->buffer, + sfc->max_iosize, DMA_BIDIRECTIONAL); + if (dma_mapping_error(dev, sfc->dma_buffer)) { + ret = -ENOMEM; + goto err_dma_map; + } } ret = devm_spi_register_controller(dev, host); @@ -715,6 +720,9 @@ static int rockchip_sfc_probe(struct platform_device *pdev) return 0; err_register: + dma_unmap_single(dev, sfc->dma_buffer, sfc->max_iosize, + DMA_BIDIRECTIONAL); +err_dma_map: free_pages((unsigned long)sfc->buffer, get_order(sfc->max_iosize)); err_dma: pm_runtime_get_sync(dev); @@ -736,6 +744,8 @@ static void rockchip_sfc_remove(struct platform_device *pdev) struct spi_controller *host = sfc->host; spi_unregister_controller(host); + dma_unmap_single(&pdev->dev, sfc->dma_buffer, sfc->max_iosize, + DMA_BIDIRECTIONAL); free_pages((unsigned long)sfc->buffer, get_order(sfc->max_iosize)); clk_disable_unprepare(sfc->clk); From 2e9c1da4ee9d0acfca2e0a3d78f3d8cb5802da1b Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 26 Sep 2025 21:56:56 +0200 Subject: [PATCH 007/263] wifi: ath10k: Fix memory leak on unsupported WMI command ath10k_wmi_cmd_send takes ownership of the passed buffer (skb) and has the responsibility to release it in case of error. This patch fixes missing free in case of early error due to unhandled WMI command ID. Tested-on: WCN3990 hw1.0 WLAN.HL.3.3.7.c2-00931-QCAHLSWMTPLZ-1 Fixes: 553215592f14 ("ath10k: warn if give WMI command is not supported") Suggested-by: Jeff Johnson Signed-off-by: Loic Poulain Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250926195656.187970-1-loic.poulain@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath10k/wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index cb8ae751eb31..b4aad6604d6d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -1938,6 +1938,7 @@ int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id) if (cmd_id == WMI_CMD_UNSUPPORTED) { ath10k_warn(ar, "wmi command %d is not supported by firmware\n", cmd_id); + dev_kfree_skb_any(skb); return ret; } From 0eb002c93c3b47f88244cecb1e356eaeab61a6bf Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 29 Sep 2025 15:21:35 -0400 Subject: [PATCH 008/263] wifi: ath11k: Add missing platform IDs for quirk table Lenovo platforms can come with one of two different IDs. The pm_quirk table was missing the second ID for each platform. Add missing ID and some extra platform identification comments. Reported on https://bugzilla.kernel.org/show_bug.cgi?id=219196 Tested-on: P14s G4 AMD. Fixes: ce8669a27016 ("wifi: ath11k: determine PM policy based on machine model") Signed-off-by: Mark Pearson Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219196 Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250929192146.1789648-1-mpearson-lenovo@squebb.ca Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.c | 54 +++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index d49353b6b2e7..e9618432cd2f 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -912,42 +912,84 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { static const struct dmi_system_id ath11k_pm_quirk_table[] = { { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* X13 G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21J3"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* X13 G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21J4"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T14 G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K3"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T14 G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K4"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* P14s G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K5"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* P14s G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K6"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T16 G2 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K7"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T16 G2 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21K8"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* P16s G2 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21K9"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* P16s G2 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21KA"), }, }, { .driver_data = (void *)ATH11K_PM_WOW, - .matches = { + .matches = { /* T14s G4 AMD #1 */ + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21F8"), + }, + }, + { + .driver_data = (void *)ATH11K_PM_WOW, + .matches = { /* T14s G4 AMD #2 */ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "21F9"), }, From 92282074e1d2e7b6da5c05fe38a7cc974187fe14 Mon Sep 17 00:00:00 2001 From: Karthik M Date: Tue, 23 Sep 2025 15:03:16 -0700 Subject: [PATCH 009/263] wifi: ath12k: free skb during idr cleanup callback ath12k just like ath11k [1] did not handle skb cleanup during idr cleanup callback. Both ath12k_mac_vif_txmgmt_idr_remove() and ath12k_mac_tx_mgmt_pending_free() performed idr cleanup and DMA unmapping for skb but only ath12k_mac_tx_mgmt_pending_free() freed skb. As a result, during vdev deletion a memory leak occurs. Refactor all clean up steps into a new function. New function ath12k_mac_tx_mgmt_free() creates a centralized area where idr cleanup, DMA unmapping for skb and freeing skb is performed. Utilize skb pointer given by idr_remove(), instead of passed as a function argument because IDR will be protected by locking. This will prevent concurrent modification of the same IDR. Now ath12k_mac_tx_mgmt_pending_free() and ath12k_mac_vif_txmgmt_idr_remove() call ath12k_mac_tx_mgmt_free(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.4.1-00199-QCAHKSWPL_SILICONZ-1 Link: https://lore.kernel.org/r/1637832614-13831-1-git-send-email-quic_srirrama@quicinc.com > # [1] Fixes: d889913205cf ("wifi: ath12k: driver for Qualcomm Wi-Fi 7 devices") Signed-off-by: Karthik M Signed-off-by: Muna Sinada Reviewed-by: Vasanthakumar Thiagarajan Reviewed-by: Baochen Qiang Link: https://patch.msgid.link/20250923220316.1595758-1-muna.sinada@oss.qualcomm.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 34 ++++++++++++++------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 3a3965b79942..2fad2df1d6ce 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -8304,23 +8304,32 @@ static void ath12k_mgmt_over_wmi_tx_drop(struct ath12k *ar, struct sk_buff *skb) wake_up(&ar->txmgmt_empty_waitq); } -int ath12k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +static void ath12k_mac_tx_mgmt_free(struct ath12k *ar, int buf_id) { - struct sk_buff *msdu = skb; + struct sk_buff *msdu; struct ieee80211_tx_info *info; - struct ath12k *ar = ctx; - struct ath12k_base *ab = ar->ab; spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); + msdu = idr_remove(&ar->txmgmt_idr, buf_id); spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, ATH12K_SKB_CB(msdu)->paddr, msdu->len, + + if (!msdu) + return; + + dma_unmap_single(ar->ab->dev, ATH12K_SKB_CB(msdu)->paddr, msdu->len, DMA_TO_DEVICE); info = IEEE80211_SKB_CB(msdu); memset(&info->status, 0, sizeof(info->status)); - ath12k_mgmt_over_wmi_tx_drop(ar, skb); + ath12k_mgmt_over_wmi_tx_drop(ar, msdu); +} + +int ath12k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +{ + struct ath12k *ar = ctx; + + ath12k_mac_tx_mgmt_free(ar, buf_id); return 0; } @@ -8329,17 +8338,10 @@ static int ath12k_mac_vif_txmgmt_idr_remove(int buf_id, void *skb, void *ctx) { struct ieee80211_vif *vif = ctx; struct ath12k_skb_cb *skb_cb = ATH12K_SKB_CB(skb); - struct sk_buff *msdu = skb; struct ath12k *ar = skb_cb->ar; - struct ath12k_base *ab = ar->ab; - if (skb_cb->vif == vif) { - spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); - spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, skb_cb->paddr, msdu->len, - DMA_TO_DEVICE); - } + if (skb_cb->vif == vif) + ath12k_mac_tx_mgmt_free(ar, buf_id); return 0; } From 9c78e747dd4fee6c36fcc926212e20032055cf9d Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Fri, 3 Oct 2025 14:51:58 +0530 Subject: [PATCH 010/263] wifi: ath11k: avoid bit operation on key flags Bitwise operations with WMI_KEY_PAIRWISE (defined as 0) are ineffective and misleading. This results in pairwise key validations added in commit 97acb0259cc9 ("wifi: ath11k: fix group data packet drops during rekey") to always evaluate false and clear key commands for pairwise keys are not honored. Since firmware supports overwriting the new key without explicitly clearing the previous one, there is no visible impact currently. However, to restore consistency with the previous behavior and improve clarity, replace bitwise operations with direct assignments and comparisons for key flags. Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.9.0.1-02146-QCAHKSWPL_SILICONZ-1 Tested-on: WCN6855 hw2.1 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3.6510.41 Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-wireless/aLlaetkalDvWcB7b@stanley.mountain Fixes: 97acb0259cc9 ("wifi: ath11k: fix group data packet drops during rekey") Signed-off-by: Rameshkumar Sundaram Reviewed-by: Vasanthakumar Thiagarajan Link: https://patch.msgid.link/20251003092158.1080637-1-rameshkumar.sundaram@oss.qualcomm.com [update copyright per current guidance] Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/mac.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 106e2530b64e..0e41b5a91d66 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2025 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include @@ -4417,9 +4417,9 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) - flags |= WMI_KEY_PAIRWISE; + flags = WMI_KEY_PAIRWISE; else - flags |= WMI_KEY_GROUP; + flags = WMI_KEY_GROUP; ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "%s for peer %pM on vdev %d flags 0x%X, type = %d, num_sta %d\n", @@ -4456,7 +4456,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, is_ap_with_no_sta = (vif->type == NL80211_IFTYPE_AP && !arvif->num_stations); - if ((flags & WMI_KEY_PAIRWISE) || cmd == SET_KEY || is_ap_with_no_sta) { + if (flags == WMI_KEY_PAIRWISE || cmd == SET_KEY || is_ap_with_no_sta) { ret = ath11k_install_key(arvif, key, cmd, peer_addr, flags); if (ret) { ath11k_warn(ab, "ath11k_install_key failed (%d)\n", ret); @@ -4470,7 +4470,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, goto exit; } - if ((flags & WMI_KEY_GROUP) && cmd == SET_KEY && is_ap_with_no_sta) + if (flags == WMI_KEY_GROUP && cmd == SET_KEY && is_ap_with_no_sta) arvif->reinstall_group_keys = true; } From 18a5f1af596e6ba22cd40ada449063041f3ce6d4 Mon Sep 17 00:00:00 2001 From: Artem Shimko Date: Tue, 7 Oct 2025 13:11:33 +0300 Subject: [PATCH 011/263] spi: dw-mmio: add error handling for reset_control_deassert() Currently reset_control_deassert() is called without checking its return value. This can lead to silent failures when reset deassertion fails. Add proper error handling to: 1. Check the return value of reset_control_deassert() 2. Return the error to the caller 3. Provide meaningful error message using dev_err_probe() This ensures that reset-related failures are properly reported during probe and helps with debugging reset issues. Signed-off-by: Artem Shimko Link: https://patch.msgid.link/20251007101134.1912895-1-a.shimko.dev@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-dw-mmio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c index f0f576fac77a..7a5197586919 100644 --- a/drivers/spi/spi-dw-mmio.c +++ b/drivers/spi/spi-dw-mmio.c @@ -358,7 +358,9 @@ static int dw_spi_mmio_probe(struct platform_device *pdev) if (IS_ERR(dwsmmio->rstc)) return PTR_ERR(dwsmmio->rstc); - reset_control_deassert(dwsmmio->rstc); + ret = reset_control_deassert(dwsmmio->rstc); + if (ret) + return dev_err_probe(&pdev->dev, ret, "Failed to deassert resets\n"); dws->bus_num = pdev->id; From 268eb6fb908bc82ce479e4dba9a2cad11f536c9c Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Fri, 19 Sep 2025 14:25:34 +0800 Subject: [PATCH 012/263] dt-bindings: usb: dwc3-imx8mp: dma-range is required only for imx8mp Only i.MX8MP need dma-range property to let USB controller work properly. Remove dma-range from required list and add limitation for imx8mp. Fixes: d2a704e29711 ("dt-bindings: usb: dwc3-imx8mp: add imx8mp dwc3 glue bindings") Cc: stable Reviewed-by: Jun Li Signed-off-by: Xu Yang Reviewed-by: Frank Li Acked-by: Conor Dooley Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml b/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml index baf130669c38..73e7a60a0060 100644 --- a/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/fsl,imx8mp-dwc3.yaml @@ -89,13 +89,21 @@ required: - reg - "#address-cells" - "#size-cells" - - dma-ranges - ranges - clocks - clock-names - interrupts - power-domains +allOf: + - if: + properties: + compatible: + const: fsl,imx8mp-dwc3 + then: + required: + - dma-ranges + additionalProperties: false examples: From 2758246d287549e1088eae350654160cbf4d424f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 29 Sep 2025 20:28:50 +0200 Subject: [PATCH 013/263] usb: dwc3: Don't call clk_bulk_disable_unprepare() twice devm_clk_bulk_get_all_enabled() is used in the probe, so clk_bulk_disable_unprepare() should not be called explicitly in the remove function. Fixes: e0b6dc00c701 ("usb: dwc3: add generic driver to support flattened") Signed-off-by: Christophe JAILLET Acked-by: Thinh Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-generic-plat.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-generic-plat.c b/drivers/usb/dwc3/dwc3-generic-plat.c index d96b20570002..f8ad79c08c4e 100644 --- a/drivers/usb/dwc3/dwc3-generic-plat.c +++ b/drivers/usb/dwc3/dwc3-generic-plat.c @@ -85,11 +85,8 @@ static int dwc3_generic_probe(struct platform_device *pdev) static void dwc3_generic_remove(struct platform_device *pdev) { struct dwc3 *dwc = platform_get_drvdata(pdev); - struct dwc3_generic *dwc3g = to_dwc3_generic(dwc); dwc3_core_remove(dwc); - - clk_bulk_disable_unprepare(dwc3g->num_clocks, dwc3g->clks); } static int dwc3_generic_suspend(struct device *dev) From bd8c3ce6d7a205b3ba3ef9815db4c6932290ec59 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 30 Sep 2025 19:17:21 +0200 Subject: [PATCH 014/263] dt-bindings: usb: switch: split out ports definition The ports definition currently defined in the usb-switch.yaml fits standards devices which are either recipient of altmode muxing and orientation switching events or an element of the USB Super Speed data lanes. This doesn't necessarely fit combo PHYs like the Qualcomm USB3/DP Combo which has a different ports representation. Move the ports definition to a separate usb-switch-ports.yaml and reference it next to the usb-switch.yaml, except for the Qualcomm USB3/DP Combo PHY bindings. Reported-by: Rob Herring Closes: https://lore.kernel.org/all/175462129176.394940.16810637795278334342.robh@kernel.org/ Fixes: 3bad7fe22796 ("dt-bindings: phy: qcom,sc8280xp-qmp-usb43dp: Reference usb-switch.yaml to allow mode-switch") Signed-off-by: Neil Armstrong Reviewed-by: Rob Herring (Arm) Signed-off-by: Greg Kroah-Hartman --- .../bindings/phy/fsl,imx8mq-usb-phy.yaml | 4 +- .../bindings/phy/samsung,usb3-drd-phy.yaml | 4 +- .../devicetree/bindings/usb/fcs,fsa4480.yaml | 1 + .../devicetree/bindings/usb/gpio-sbu-mux.yaml | 1 + .../devicetree/bindings/usb/nxp,ptn36502.yaml | 1 + .../bindings/usb/onnn,nb7vpq904m.yaml | 1 + .../bindings/usb/parade,ps8830.yaml | 1 + .../bindings/usb/qcom,wcd939x-usbss.yaml | 1 + .../devicetree/bindings/usb/ti,tusb1046.yaml | 1 + .../bindings/usb/usb-switch-ports.yaml | 68 +++++++++++++++++++ .../devicetree/bindings/usb/usb-switch.yaml | 52 -------------- 11 files changed, 81 insertions(+), 54 deletions(-) create mode 100644 Documentation/devicetree/bindings/usb/usb-switch-ports.yaml diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml index 6a47e08e0e97..f9cffbb2df07 100644 --- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml +++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml @@ -142,7 +142,9 @@ allOf: required: - orientation-switch then: - $ref: /schemas/usb/usb-switch.yaml# + allOf: + - $ref: /schemas/usb/usb-switch.yaml# + - $ref: /schemas/usb/usb-switch-ports.yaml# unevaluatedProperties: false diff --git a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml index e906403208c0..ea1135c91fb7 100644 --- a/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml +++ b/Documentation/devicetree/bindings/phy/samsung,usb3-drd-phy.yaml @@ -125,7 +125,9 @@ allOf: contains: const: google,gs101-usb31drd-phy then: - $ref: /schemas/usb/usb-switch.yaml# + allOf: + - $ref: /schemas/usb/usb-switch.yaml# + - $ref: /schemas/usb/usb-switch-ports.yaml# properties: clocks: diff --git a/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml b/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml index e3a7df91f7f1..89b1fb90aeeb 100644 --- a/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml +++ b/Documentation/devicetree/bindings/usb/fcs,fsa4480.yaml @@ -76,6 +76,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml b/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml index e588514fab2d..793662f6f3bf 100644 --- a/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml +++ b/Documentation/devicetree/bindings/usb/gpio-sbu-mux.yaml @@ -52,6 +52,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# - if: required: - mode-switch diff --git a/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml b/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml index d805dde80796..4d2fcaa71870 100644 --- a/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml +++ b/Documentation/devicetree/bindings/usb/nxp,ptn36502.yaml @@ -46,6 +46,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml b/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml index 589914d22bf2..25fab5fdc2cd 100644 --- a/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml +++ b/Documentation/devicetree/bindings/usb/onnn,nb7vpq904m.yaml @@ -91,6 +91,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/parade,ps8830.yaml b/Documentation/devicetree/bindings/usb/parade,ps8830.yaml index aeb33667818e..eaeab1c01a59 100644 --- a/Documentation/devicetree/bindings/usb/parade,ps8830.yaml +++ b/Documentation/devicetree/bindings/usb/parade,ps8830.yaml @@ -81,6 +81,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml b/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml index 96346723f3e9..96dcec9b7620 100644 --- a/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,wcd939x-usbss.yaml @@ -60,6 +60,7 @@ required: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# additionalProperties: false diff --git a/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml b/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml index f713cac4a8ac..e1501ea6b50b 100644 --- a/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml +++ b/Documentation/devicetree/bindings/usb/ti,tusb1046.yaml @@ -11,6 +11,7 @@ maintainers: allOf: - $ref: usb-switch.yaml# + - $ref: usb-switch-ports.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/usb/usb-switch-ports.yaml b/Documentation/devicetree/bindings/usb/usb-switch-ports.yaml new file mode 100644 index 000000000000..6bf0c97e30ae --- /dev/null +++ b/Documentation/devicetree/bindings/usb/usb-switch-ports.yaml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/usb/usb-switch-ports.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: USB Orientation and Mode Switches Ports Graph Properties + +maintainers: + - Greg Kroah-Hartman + +description: + Ports Graph properties for devices handling USB mode and orientation switching. + +properties: + port: + $ref: /schemas/graph.yaml#/$defs/port-base + description: + A port node to link the device to a TypeC controller for the purpose of + handling altmode muxing and orientation switching. + + properties: + endpoint: + $ref: /schemas/graph.yaml#/$defs/endpoint-base + unevaluatedProperties: false + properties: + data-lanes: + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 8 + uniqueItems: true + items: + maximum: 8 + + ports: + $ref: /schemas/graph.yaml#/properties/ports + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: + Super Speed (SS) Output endpoint to the Type-C connector + + port@1: + $ref: /schemas/graph.yaml#/$defs/port-base + description: + Super Speed (SS) Input endpoint from the Super-Speed PHY + unevaluatedProperties: false + + properties: + endpoint: + $ref: /schemas/graph.yaml#/$defs/endpoint-base + unevaluatedProperties: false + properties: + data-lanes: + $ref: /schemas/types.yaml#/definitions/uint32-array + minItems: 1 + maxItems: 8 + uniqueItems: true + items: + maximum: 8 + +oneOf: + - required: + - port + - required: + - ports + +additionalProperties: true diff --git a/Documentation/devicetree/bindings/usb/usb-switch.yaml b/Documentation/devicetree/bindings/usb/usb-switch.yaml index 896201912630..f77731493dc4 100644 --- a/Documentation/devicetree/bindings/usb/usb-switch.yaml +++ b/Documentation/devicetree/bindings/usb/usb-switch.yaml @@ -25,56 +25,4 @@ properties: description: Possible handler of SuperSpeed signals retiming type: boolean - port: - $ref: /schemas/graph.yaml#/$defs/port-base - description: - A port node to link the device to a TypeC controller for the purpose of - handling altmode muxing and orientation switching. - - properties: - endpoint: - $ref: /schemas/graph.yaml#/$defs/endpoint-base - unevaluatedProperties: false - properties: - data-lanes: - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 1 - maxItems: 8 - uniqueItems: true - items: - maximum: 8 - - ports: - $ref: /schemas/graph.yaml#/properties/ports - properties: - port@0: - $ref: /schemas/graph.yaml#/properties/port - description: - Super Speed (SS) Output endpoint to the Type-C connector - - port@1: - $ref: /schemas/graph.yaml#/$defs/port-base - description: - Super Speed (SS) Input endpoint from the Super-Speed PHY - unevaluatedProperties: false - - properties: - endpoint: - $ref: /schemas/graph.yaml#/$defs/endpoint-base - unevaluatedProperties: false - properties: - data-lanes: - $ref: /schemas/types.yaml#/definitions/uint32-array - minItems: 1 - maxItems: 8 - uniqueItems: true - items: - maximum: 8 - -oneOf: - - required: - - port - - required: - - ports - additionalProperties: true From dddc0f71485f1f29f236e387632181bcc09019a0 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 6 Oct 2025 22:39:52 +0100 Subject: [PATCH 015/263] usb: misc: Add x86 dependency for Intel USBIO driver The Intel USBIO driver is x86 only, other architectures have ACPI so add an appropriate depenecy plus compile test. Fixes: 121a0f839dbb3 ("usb: misc: Add Intel USBIO bridge driver") Signed-off-by: Peter Robinson Reviewed-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig index 09ac6f1c985f..0b56b773dbdf 100644 --- a/drivers/usb/misc/Kconfig +++ b/drivers/usb/misc/Kconfig @@ -182,6 +182,7 @@ config USB_LJCA config USB_USBIO tristate "Intel USBIO Bridge support" depends on USB && ACPI + depends on X86 || COMPILE_TEST select AUXILIARY_BUS help This adds support for Intel USBIO drivers. From 51cb04abd39097209b871e95ffa7e8584ce7dcba Mon Sep 17 00:00:00 2001 From: Krishna Kurapati Date: Mon, 13 Oct 2025 09:29:20 +0530 Subject: [PATCH 016/263] dt-bindings: usb: qcom,snps-dwc3: Fix bindings for X1E80100 Add the missing multiport controller binding to target list. Fix minItems for interrupt-names to avoid the following error on High Speed controller: usb@a200000: interrupt-names: ['dwc_usb3', 'pwr_event', 'dp_hs_phy_irq', 'dm_hs_phy_irq'] is too short Fixes: 6e762f7b8edc ("dt-bindings: usb: Introduce qcom,snps-dwc3") Cc: stable@vger.kernel.org Signed-off-by: Krishna Kurapati Reviewed-by: Krzysztof Kozlowski Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml index dfd084ed9024..d49a58d5478f 100644 --- a/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml +++ b/Documentation/devicetree/bindings/usb/qcom,snps-dwc3.yaml @@ -68,6 +68,7 @@ properties: - qcom,sm8550-dwc3 - qcom,sm8650-dwc3 - qcom,x1e80100-dwc3 + - qcom,x1e80100-dwc3-mp - const: qcom,snps-dwc3 reg: @@ -460,8 +461,10 @@ allOf: then: properties: interrupts: + minItems: 4 maxItems: 5 interrupt-names: + minItems: 4 items: - const: dwc_usb3 - const: pwr_event From d3c4c1f29aadccf2f43530bfa1e60a6d8030fd4a Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Sun, 28 Sep 2025 11:18:18 +0200 Subject: [PATCH 017/263] staging: gpib: Fix no EOI on 1 and 2 byte writes EOI (End Or Identify) is a hardware line on the GPIB bus that can be asserted with the last byte of a message to indicate the end of the transfer to the receiving device. In this driver, a write with send_eoi true is done in 3 parts: Send first byte directly Send remaining but 1 bytes using the fifo Send the last byte directly with EOI asserted The first byte in a write is always sent by writing to the tms9914 chip directly to setup for the subsequent fifo transfer. We were not checking for a 1 byte write with send_eoi true resulting in EOI not being asserted. Since the fifo transfer was not executed (fifotransfersize == 0) the retval in the test after the fifo transfer code was still 1 from the preceding direct write. This caused it to return without executing the final direct write which would have sent an unsollicited extra byte. For a 2 byte message the first byte was sent directly. But since the fifo transfer was not executed (fifotransfersize == 1) and the retval in the test after the fifo transfer code was still 1 from the preceding first byte write it returned before the final direct byte write with send_eoi true. The second byte was then sent as a separate 1 byte write to complete the 2 byte write count again without EOI being asserted as above. Only send the first byte directly if more than 1 byte is to be transferred with send_eoi true. Also check for retval < 0 for the error return in case the fifo code is not used (1 or 2 byte message with send_eoi true). Fixes: 09a4655ee1eb ("staging: gpib: Add HP/Agilent/Keysight 8235xx PCI GPIB driver") Cc: stable Tested-by: Dave Penkler Signed-off-by: Dave Penkler Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/agilent_82350b/agilent_82350b.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c index 94bbb3b6576d..01a5bb43cd2d 100644 --- a/drivers/staging/gpib/agilent_82350b/agilent_82350b.c +++ b/drivers/staging/gpib/agilent_82350b/agilent_82350b.c @@ -182,10 +182,12 @@ static int agilent_82350b_accel_write(struct gpib_board *board, u8 *buffer, return retval; #endif - retval = agilent_82350b_write(board, buffer, 1, 0, &num_bytes); - *bytes_written += num_bytes; - if (retval < 0) - return retval; + if (fifotransferlength > 0) { + retval = agilent_82350b_write(board, buffer, 1, 0, &num_bytes); + *bytes_written += num_bytes; + if (retval < 0) + return retval; + } write_byte(tms_priv, tms_priv->imr0_bits & ~HR_BOIE, IMR0); for (i = 1; i < fifotransferlength;) { @@ -217,7 +219,7 @@ static int agilent_82350b_accel_write(struct gpib_board *board, u8 *buffer, break; } write_byte(tms_priv, tms_priv->imr0_bits, IMR0); - if (retval) + if (retval < 0) return retval; if (send_eoi) { From 92a2b74a6b5a5d9b076cd9aa75e63c6461cbd073 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Sun, 28 Sep 2025 13:33:58 +0200 Subject: [PATCH 018/263] staging: gpib: Fix sending clear and trigger events This driver was not sending device clear or trigger events when the board entered the DCAS or DTAS state respectively in device mode. DCAS is the Device Clear Active State which is entered on receiving a selective device clear message (SDC) or universal device clear message (DCL) from the controller in charge. DTAS is the Device Trigger Active State which is entered on receiving a group execute trigger (GET) message from the controller. In order for an application, implementing a particular device, to detect when one of these states is entered the driver needs to send the appropriate event. Send the appropriate gpib_event when DCAS or DTAS is set in the reported status word. This sets the DCAS or DTAS bits in the board's status word which can be monitored by the application. Fixes: 4e127de14fa7 ("staging: gpib: Add National Instruments USB GPIB driver") Cc: stable Tested-by: Dave Penkler Signed-off-by: Dave Penkler Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index 4dec87d12687..ea44a766fda2 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -327,7 +327,10 @@ static void ni_usb_soft_update_status(struct gpib_board *board, unsigned int ni_ board->status &= ~clear_mask; board->status &= ~ni_usb_ibsta_mask; board->status |= ni_usb_ibsta & ni_usb_ibsta_mask; - // FIXME should generate events on DTAS and DCAS + if (ni_usb_ibsta & DCAS) + push_gpib_event(board, EVENT_DEV_CLR); + if (ni_usb_ibsta & DTAS) + push_gpib_event(board, EVENT_DEV_TRG); spin_lock_irqsave(&board->spinlock, flags); /* remove set status bits from monitored set why ?***/ From aaf2af1ed147ef49be65afb541a67255e9f60d15 Mon Sep 17 00:00:00 2001 From: Dave Penkler Date: Sun, 28 Sep 2025 13:33:59 +0200 Subject: [PATCH 019/263] staging: gpib: Return -EINTR on device clear When the ATN (Attention) line is asserted during a read we get a NIUSB_ATN_STATE_ERROR during a read. For the controller to send a device clear it asserts ATN. Normally this is an error but in the case of a device clear it should be regarded as an interrupt. Return -EINTR when the Device Clear Active State (DCAS) is entered else signal an error with dev_dbg with status instead of just dev_err. Signed-off-by: Dave Penkler Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/ni_usb/ni_usb_gpib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c index ea44a766fda2..1f8412de9fa3 100644 --- a/drivers/staging/gpib/ni_usb/ni_usb_gpib.c +++ b/drivers/staging/gpib/ni_usb/ni_usb_gpib.c @@ -697,8 +697,12 @@ static int ni_usb_read(struct gpib_board *board, u8 *buffer, size_t length, */ break; case NIUSB_ATN_STATE_ERROR: - retval = -EIO; - dev_err(&usb_dev->dev, "read when ATN set\n"); + if (status.ibsta & DCAS) { + retval = -EINTR; + } else { + retval = -EIO; + dev_dbg(&usb_dev->dev, "read when ATN set stat: 0x%06x\n", status.ibsta); + } break; case NIUSB_ADDRESSING_ERROR: retval = -EIO; From b1aabb8ef09b4cf0cc0c92ca9dfd19482f3192c1 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 23 Sep 2025 09:36:03 +0800 Subject: [PATCH 020/263] staging: gpib: Fix device reference leak in fmh_gpib driver The fmh_gpib driver contains a device reference count leak in fmh_gpib_attach_impl() where driver_find_device() increases the reference count of the device by get_device() when matching but this reference is not properly decreased. Add put_device() in fmh_gpib_detach(), which ensures that the reference count of the device is correctly managed. Found by code review. Cc: stable Fixes: 8e4841a0888c ("staging: gpib: Add Frank Mori Hess FPGA PCI GPIB driver") Signed-off-by: Ma Ke Reviewed-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/gpib/fmh_gpib/fmh_gpib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c index 164dcfc3c9ef..f7bfb4a8e553 100644 --- a/drivers/staging/gpib/fmh_gpib/fmh_gpib.c +++ b/drivers/staging/gpib/fmh_gpib/fmh_gpib.c @@ -1517,6 +1517,11 @@ void fmh_gpib_detach(struct gpib_board *board) resource_size(e_priv->gpib_iomem_res)); } fmh_gpib_generic_detach(board); + + if (board->dev) { + put_device(board->dev); + board->dev = NULL; + } } static int fmh_gpib_pci_attach_impl(struct gpib_board *board, From 7e69a24b6b35d4ffd54dd702047a01f5858b3e45 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Wed, 24 Sep 2025 15:05:10 +0200 Subject: [PATCH 021/263] rust_binder: clean `clippy::mem_replace_with_default` warning Clippy reports: error: replacing a value of type `T` with `T::default()` is better expressed using `core::mem::take` --> drivers/android/binder/node.rs:690:32 | 690 | _unused_capacity = mem::replace(&mut inner.freeze_list, KVVec::new()); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ help: consider using: `core::mem::take(&mut inner.freeze_list)` | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#mem_replace_with_default = note: `-D clippy::mem-replace-with-default` implied by `-D warnings` = help: to override `-D warnings` add `#[allow(clippy::mem_replace_with_default)]` The suggestion seems fine, thus apply it. Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/node.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/android/binder/node.rs b/drivers/android/binder/node.rs index ade895ef791e..08d362deaf61 100644 --- a/drivers/android/binder/node.rs +++ b/drivers/android/binder/node.rs @@ -687,7 +687,7 @@ pub(crate) fn remove_freeze_listener(&self, p: &Arc) { ); } if inner.freeze_list.is_empty() { - _unused_capacity = mem::replace(&mut inner.freeze_list, KVVec::new()); + _unused_capacity = mem::take(&mut inner.freeze_list); } } From c7c090af371775106360c9e7a7c35b718311c3f9 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 2 Oct 2025 09:25:29 +0000 Subject: [PATCH 022/263] rust_binder: remove warning about orphan mappings This condition occurs if a thread dies while processing a transaction. We should not print anything in this scenario. Signed-off-by: Alice Ryhl Reviewed-by: Joel Fernandes Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/process.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs index f13a747e784c..d8c3c1ae740e 100644 --- a/drivers/android/binder/process.rs +++ b/drivers/android/binder/process.rs @@ -1346,10 +1346,6 @@ fn deferred_release(self: Arc) { .alloc .take_for_each(|offset, size, debug_id, odata| { let ptr = offset + address; - pr_warn!( - "{}: removing orphan mapping {offset}:{size}\n", - self.pid_in_current_ns() - ); let mut alloc = Allocation::new(self.clone(), debug_id, offset, size, ptr, false); if let Some(data) = odata { From bfe144da06b002cccf314769c45ecccb69501c48 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 7 Oct 2025 09:39:51 +0000 Subject: [PATCH 023/263] rust_binder: freeze_notif_done should resend if wrong state Consider the following scenario: 1. A freeze notification is delivered to thread 1. 2. The process becomes frozen or unfrozen. 3. The message for step 2 is delivered to thread 2 and ignored because there is already a pending notification from step 1. 4. Thread 1 acknowledges the notification from step 1. In this case, step 4 should ensure that the message ignored in step 3 is resent as it can now be delivered. Signed-off-by: Alice Ryhl Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/freeze.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/android/binder/freeze.rs b/drivers/android/binder/freeze.rs index e68c3c8bc55a..74bebb8d4d9b 100644 --- a/drivers/android/binder/freeze.rs +++ b/drivers/android/binder/freeze.rs @@ -245,8 +245,9 @@ pub(crate) fn freeze_notif_done(self: &Arc, reader: &mut UserSliceReader) ); return Err(EINVAL); } - if freeze.is_clearing { - // Immediately send another FreezeMessage for BR_CLEAR_FREEZE_NOTIFICATION_DONE. + let is_frozen = freeze.node.owner.inner.lock().is_frozen; + if freeze.is_clearing || freeze.last_is_frozen != Some(is_frozen) { + // Immediately send another FreezeMessage. clear_msg = Some(FreezeMessage::init(alloc, cookie)); } freeze.is_pending = false; From 99559e5bb4c6795824b6531ad61519c1d9500079 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 7 Oct 2025 09:39:52 +0000 Subject: [PATCH 024/263] rust_binder: don't delete FreezeListener if there are pending duplicates When userspace issues commands to a freeze listener, it identifies it using a cookie. Normally this cookie uniquely identifies a freeze listener, but when userspace clears a listener with the intent of deleting it, it's allowed to "regret" clearing it and create a new freeze listener for the same node using the same cookie. (IMO this was an API mistake, but userspace relies on it.) Currently if the active freeze listener gets fully deleted while there are still pending duplicates, then the code incorrectly deletes the pending duplicates too. To fix this, do not delete the entry if there are still pending duplicates. Since the current data structure requires a main freeze listener, we convert one pending duplicate into the primary listener in this scenario. Signed-off-by: Alice Ryhl Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/freeze.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder/freeze.rs b/drivers/android/binder/freeze.rs index 74bebb8d4d9b..e304aceca7f3 100644 --- a/drivers/android/binder/freeze.rs +++ b/drivers/android/binder/freeze.rs @@ -106,7 +106,16 @@ fn do_work( return Ok(true); } if freeze.is_clearing { - _removed_listener = freeze_entry.remove_node(); + kernel::warn_on!(freeze.num_cleared_duplicates != 0); + if freeze.num_pending_duplicates > 0 { + // The primary freeze listener was deleted, so convert a pending duplicate back + // into the primary one. + freeze.num_pending_duplicates -= 1; + freeze.is_pending = true; + freeze.is_clearing = true; + } else { + _removed_listener = freeze_entry.remove_node(); + } drop(node_refs); writer.write_code(BR_CLEAR_FREEZE_NOTIFICATION_DONE)?; writer.write_payload(&self.cookie.0)?; From b5ce7a5cc50f4c283d0bfa5cc24fe864cb9a3400 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 7 Oct 2025 09:39:53 +0000 Subject: [PATCH 025/263] rust_binder: report freeze notification only when fully frozen Binder only sends out freeze notifications when ioctl_freeze() completes and the process has become fully frozen. However, if a freeze notification is registered during the freeze operation, then it registers an initial state of 'frozen'. This is a problem because if the freeze operation fails, then the listener is not told about that state change, leading to lost updates. Signed-off-by: Alice Ryhl Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder/freeze.rs | 4 +-- drivers/android/binder/process.rs | 46 +++++++++++++++++++++------ drivers/android/binder/transaction.rs | 6 ++-- 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/drivers/android/binder/freeze.rs b/drivers/android/binder/freeze.rs index e304aceca7f3..220de35ae85a 100644 --- a/drivers/android/binder/freeze.rs +++ b/drivers/android/binder/freeze.rs @@ -121,7 +121,7 @@ fn do_work( writer.write_payload(&self.cookie.0)?; Ok(true) } else { - let is_frozen = freeze.node.owner.inner.lock().is_frozen; + let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen(); if freeze.last_is_frozen == Some(is_frozen) { return Ok(true); } @@ -254,7 +254,7 @@ pub(crate) fn freeze_notif_done(self: &Arc, reader: &mut UserSliceReader) ); return Err(EINVAL); } - let is_frozen = freeze.node.owner.inner.lock().is_frozen; + let is_frozen = freeze.node.owner.inner.lock().is_frozen.is_fully_frozen(); if freeze.is_clearing || freeze.last_is_frozen != Some(is_frozen) { // Immediately send another FreezeMessage. clear_msg = Some(FreezeMessage::init(alloc, cookie)); diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs index d8c3c1ae740e..7607353a5e92 100644 --- a/drivers/android/binder/process.rs +++ b/drivers/android/binder/process.rs @@ -72,6 +72,33 @@ fn new(address: usize, size: usize) -> Self { const PROC_DEFER_FLUSH: u8 = 1; const PROC_DEFER_RELEASE: u8 = 2; +#[derive(Copy, Clone)] +pub(crate) enum IsFrozen { + Yes, + No, + InProgress, +} + +impl IsFrozen { + /// Whether incoming transactions should be rejected due to freeze. + pub(crate) fn is_frozen(self) -> bool { + match self { + IsFrozen::Yes => true, + IsFrozen::No => false, + IsFrozen::InProgress => true, + } + } + + /// Whether freeze notifications consider this process frozen. + pub(crate) fn is_fully_frozen(self) -> bool { + match self { + IsFrozen::Yes => true, + IsFrozen::No => false, + IsFrozen::InProgress => false, + } + } +} + /// The fields of `Process` protected by the spinlock. pub(crate) struct ProcessInner { is_manager: bool, @@ -98,7 +125,7 @@ pub(crate) struct ProcessInner { /// are woken up. outstanding_txns: u32, /// Process is frozen and unable to service binder transactions. - pub(crate) is_frozen: bool, + pub(crate) is_frozen: IsFrozen, /// Process received sync transactions since last frozen. pub(crate) sync_recv: bool, /// Process received async transactions since last frozen. @@ -124,7 +151,7 @@ fn new() -> Self { started_thread_count: 0, defer_work: 0, outstanding_txns: 0, - is_frozen: false, + is_frozen: IsFrozen::No, sync_recv: false, async_recv: false, binderfs_file: None, @@ -1260,7 +1287,7 @@ fn deferred_release(self: Arc) { let is_manager = { let mut inner = self.inner.lock(); inner.is_dead = true; - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; inner.sync_recv = false; inner.async_recv = false; inner.is_manager @@ -1367,7 +1394,7 @@ pub(crate) fn drop_outstanding_txn(&self) { return; } inner.outstanding_txns -= 1; - inner.is_frozen && inner.outstanding_txns == 0 + inner.is_frozen.is_frozen() && inner.outstanding_txns == 0 }; if wake { @@ -1381,7 +1408,7 @@ pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { let mut inner = self.inner.lock(); inner.sync_recv = false; inner.async_recv = false; - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; drop(inner); msgs.send_messages(); return Ok(()); @@ -1390,7 +1417,7 @@ pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { let mut inner = self.inner.lock(); inner.sync_recv = false; inner.async_recv = false; - inner.is_frozen = true; + inner.is_frozen = IsFrozen::InProgress; if info.timeout_ms > 0 { let mut jiffies = kernel::time::msecs_to_jiffies(info.timeout_ms); @@ -1404,7 +1431,7 @@ pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { .wait_interruptible_timeout(&mut inner, jiffies) { CondVarTimeoutResult::Signal { .. } => { - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; return Err(ERESTARTSYS); } CondVarTimeoutResult::Woken { jiffies: remaining } => { @@ -1418,17 +1445,18 @@ pub(crate) fn ioctl_freeze(&self, info: &BinderFreezeInfo) -> Result { } if inner.txns_pending_locked() { - inner.is_frozen = false; + inner.is_frozen = IsFrozen::No; Err(EAGAIN) } else { drop(inner); match self.prepare_freeze_messages() { Ok(batch) => { + self.inner.lock().is_frozen = IsFrozen::Yes; batch.send_messages(); Ok(()) } Err(kernel::alloc::AllocError) => { - self.inner.lock().is_frozen = false; + self.inner.lock().is_frozen = IsFrozen::No; Err(ENOMEM) } } diff --git a/drivers/android/binder/transaction.rs b/drivers/android/binder/transaction.rs index 02512175d622..4bd3c0e417eb 100644 --- a/drivers/android/binder/transaction.rs +++ b/drivers/android/binder/transaction.rs @@ -249,7 +249,7 @@ pub(crate) fn submit(self: DLArc) -> BinderResult { if oneway { if let Some(target_node) = self.target_node.clone() { - if process_inner.is_frozen { + if process_inner.is_frozen.is_frozen() { process_inner.async_recv = true; if self.flags & TF_UPDATE_TXN != 0 { if let Some(t_outdated) = @@ -270,7 +270,7 @@ pub(crate) fn submit(self: DLArc) -> BinderResult { } } - if process_inner.is_frozen { + if process_inner.is_frozen.is_frozen() { return Err(BinderError::new_frozen_oneway()); } else { return Ok(()); @@ -280,7 +280,7 @@ pub(crate) fn submit(self: DLArc) -> BinderResult { } } - if process_inner.is_frozen { + if process_inner.is_frozen.is_frozen() { process_inner.sync_recv = true; return Err(BinderError::new_frozen()); } From 7557f189942571821a09879edfcdfdafefe4d67f Mon Sep 17 00:00:00 2001 From: Kriish Sharma Date: Fri, 3 Oct 2025 18:08:49 +0000 Subject: [PATCH 026/263] binder: Fix missing kernel-doc entries in binder.c Fix several kernel-doc warnings in `drivers/android/binder.c` caused by undocumented struct members and function parameters. In particular, add missing documentation for the `@thread` parameter in binder_free_buf_locked(). Signed-off-by: Kriish Sharma Acked-by: Carlos Llamas Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8c99ceaa303b..3a09c54bc37b 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2418,10 +2418,10 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset, /** * struct binder_ptr_fixup - data to be fixed-up in target buffer - * @offset offset in target buffer to fixup - * @skip_size bytes to skip in copy (fixup will be written later) - * @fixup_data data to write at fixup offset - * @node list node + * @offset: offset in target buffer to fixup + * @skip_size: bytes to skip in copy (fixup will be written later) + * @fixup_data: data to write at fixup offset + * @node: list node * * This is used for the pointer fixup list (pf) which is created and consumed * during binder_transaction() and is only accessed locally. No @@ -2438,10 +2438,10 @@ struct binder_ptr_fixup { /** * struct binder_sg_copy - scatter-gather data to be copied - * @offset offset in target buffer - * @sender_uaddr user address in source buffer - * @length bytes to copy - * @node list node + * @offset: offset in target buffer + * @sender_uaddr: user address in source buffer + * @length: bytes to copy + * @node: list node * * This is used for the sg copy list (sgc) which is created and consumed * during binder_transaction() and is only accessed locally. No @@ -4063,14 +4063,15 @@ binder_freeze_notification_done(struct binder_proc *proc, /** * binder_free_buf() - free the specified buffer - * @proc: binder proc that owns buffer - * @buffer: buffer to be freed - * @is_failure: failed to send transaction + * @proc: binder proc that owns buffer + * @thread: binder thread performing the buffer release + * @buffer: buffer to be freed + * @is_failure: failed to send transaction * - * If buffer for an async transaction, enqueue the next async + * If the buffer is for an async transaction, enqueue the next async * transaction from the node. * - * Cleanup buffer and free it. + * Cleanup the buffer and free it. */ static void binder_free_buf(struct binder_proc *proc, From 11fb1a82aefa6f7fea6ac82334edb5639b9927df Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 23 Sep 2025 16:09:27 +0100 Subject: [PATCH 027/263] firmware: arm_ffa: Add support for IMPDEF value in the memory access descriptor FF-A v1.2 introduced 16 byte IMPLEMENTATION DEFINED value in the endpoint memory access descriptor to allow any sender could to specify an its any custom value for each receiver. Also this value must be specified by the receiver when retrieving the memory region. The sender must ensure it informs the receiver of this value via an IMPLEMENTATION DEFINED mechanism such as a partition message. So the FF-A driver can use the message interfaces to communicate the value and set the same in the ffa_mem_region_attributes structures when using the memory interfaces. The driver ensure that the size of the endpoint memory access descriptors is set correctly based on the FF-A version. Fixes: 9fac08d9d985 ("firmware: arm_ffa: Upgrade FF-A version to v1.2 in the driver") Reported-by: Lixiang Mao Tested-by: Lixiang Mao Message-Id: <20250923150927.1218364-1-sudeep.holla@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_ffa/driver.c | 37 ++++++++++++++++++++++--------- include/linux/arm_ffa.h | 21 ++++++++++++++++-- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c index 65bf1685350a..c72ee4756585 100644 --- a/drivers/firmware/arm_ffa/driver.c +++ b/drivers/firmware/arm_ffa/driver.c @@ -649,6 +649,26 @@ static u16 ffa_memory_attributes_get(u32 func_id) return FFA_MEM_NORMAL | FFA_MEM_WRITE_BACK | FFA_MEM_INNER_SHAREABLE; } +static void ffa_emad_impdef_value_init(u32 version, void *dst, void *src) +{ + struct ffa_mem_region_attributes *ep_mem_access; + + if (FFA_EMAD_HAS_IMPDEF_FIELD(version)) + memcpy(dst, src, sizeof(ep_mem_access->impdef_val)); +} + +static void +ffa_mem_region_additional_setup(u32 version, struct ffa_mem_region *mem_region) +{ + if (!FFA_MEM_REGION_HAS_EP_MEM_OFFSET(version)) { + mem_region->ep_mem_size = 0; + } else { + mem_region->ep_mem_size = ffa_emad_size_get(version); + mem_region->ep_mem_offset = sizeof(*mem_region); + memset(mem_region->reserved, 0, 12); + } +} + static int ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, struct ffa_mem_ops_args *args) @@ -667,27 +687,24 @@ ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, mem_region->flags = args->flags; mem_region->sender_id = drv_info->vm_id; mem_region->attributes = ffa_memory_attributes_get(func_id); - ep_mem_access = buffer + - ffa_mem_desc_offset(buffer, 0, drv_info->version); composite_offset = ffa_mem_desc_offset(buffer, args->nattrs, drv_info->version); - for (idx = 0; idx < args->nattrs; idx++, ep_mem_access++) { + for (idx = 0; idx < args->nattrs; idx++) { + ep_mem_access = buffer + + ffa_mem_desc_offset(buffer, idx, drv_info->version); ep_mem_access->receiver = args->attrs[idx].receiver; ep_mem_access->attrs = args->attrs[idx].attrs; ep_mem_access->composite_off = composite_offset; ep_mem_access->flag = 0; ep_mem_access->reserved = 0; + ffa_emad_impdef_value_init(drv_info->version, + ep_mem_access->impdef_val, + args->attrs[idx].impdef_val); } mem_region->handle = 0; mem_region->ep_count = args->nattrs; - if (drv_info->version <= FFA_VERSION_1_0) { - mem_region->ep_mem_size = 0; - } else { - mem_region->ep_mem_size = sizeof(*ep_mem_access); - mem_region->ep_mem_offset = sizeof(*mem_region); - memset(mem_region->reserved, 0, 12); - } + ffa_mem_region_additional_setup(drv_info->version, mem_region); composite = buffer + composite_offset; composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg); diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index cd7ee4df9045..81e603839c4a 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -338,6 +338,7 @@ struct ffa_mem_region_attributes { * an `struct ffa_mem_region_addr_range`. */ u32 composite_off; + u8 impdef_val[16]; u64 reserved; }; @@ -417,15 +418,31 @@ struct ffa_mem_region { #define CONSTITUENTS_OFFSET(x) \ (offsetof(struct ffa_composite_mem_region, constituents[x])) +#define FFA_EMAD_HAS_IMPDEF_FIELD(version) ((version) >= FFA_VERSION_1_2) +#define FFA_MEM_REGION_HAS_EP_MEM_OFFSET(version) ((version) > FFA_VERSION_1_0) + +static inline u32 ffa_emad_size_get(u32 ffa_version) +{ + u32 sz; + struct ffa_mem_region_attributes *ep_mem_access; + + if (FFA_EMAD_HAS_IMPDEF_FIELD(ffa_version)) + sz = sizeof(*ep_mem_access); + else + sz = sizeof(*ep_mem_access) - sizeof(ep_mem_access->impdef_val); + + return sz; +} + static inline u32 ffa_mem_desc_offset(struct ffa_mem_region *buf, int count, u32 ffa_version) { - u32 offset = count * sizeof(struct ffa_mem_region_attributes); + u32 offset = count * ffa_emad_size_get(ffa_version); /* * Earlier to v1.1, the endpoint memory descriptor array started at * offset 32(i.e. offset of ep_mem_offset in the current structure) */ - if (ffa_version <= FFA_VERSION_1_0) + if (!FFA_MEM_REGION_HAS_EP_MEM_OFFSET(ffa_version)) offset += offsetof(struct ffa_mem_region, ep_mem_offset); else offset += sizeof(struct ffa_mem_region); From a89103f67112453fa36c9513e951c19eed9d2d92 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 22 Sep 2025 16:47:13 +0800 Subject: [PATCH 028/263] spi: spi-nxp-fspi: re-config the clock rate when operation require new clock rate Current operation contain the max_freq, so new coming operation may use new clock rate, need to re-config the clock rate to match the requirement. Fixes: 26851cf65ffc ("spi: nxp-fspi: Support per spi-mem operation frequency switches") Signed-off-by: Haibo Chen Link: https://patch.msgid.link/20250922-fspi-fix-v1-1-ff4315359d31@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-nxp-fspi.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index f9371f98a65b..4e82f9e900ac 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -404,6 +404,8 @@ struct nxp_fspi { #define FSPI_NEED_INIT BIT(0) #define FSPI_DTR_MODE BIT(1) int flags; + /* save the previous operation clock rate */ + unsigned long pre_op_rate; }; static inline int needs_ip_only(struct nxp_fspi *f) @@ -780,11 +782,17 @@ static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi, uint64_t size_kb; /* - * Return, if previously selected target device is same as current - * requested target device. Also the DTR or STR mode do not change. + * Return when following condition all meet, + * 1, if previously selected target device is same as current + * requested target device. + * 2, the DTR or STR mode do not change. + * 3, previous operation max rate equals current one. + * + * For other case, need to re-config. */ if ((f->selected == spi_get_chipselect(spi, 0)) && - (!!(f->flags & FSPI_DTR_MODE) == op_is_dtr)) + (!!(f->flags & FSPI_DTR_MODE) == op_is_dtr) && + (f->pre_op_rate == op->max_freq)) return; /* Reset FLSHxxCR0 registers */ @@ -832,6 +840,8 @@ static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi, else nxp_fspi_dll_override(f); + f->pre_op_rate = op->max_freq; + f->selected = spi_get_chipselect(spi, 0); } From b93b4269791fdebbac2a9ad26f324dc2abb9e60f Mon Sep 17 00:00:00 2001 From: Han Xu Date: Mon, 22 Sep 2025 16:47:14 +0800 Subject: [PATCH 029/263] spi: spi-nxp-fspi: add extra delay after dll locked Due to the erratum ERR050272, the DLL lock status register STS2 [xREFLOCK, xSLVLOCK] bit may indicate DLL is locked before DLL is actually locked. Add an extra 4us delay as a workaround. refer to ERR050272, on Page 20. https://www.nxp.com/docs/en/errata/IMX8_1N94W.pdf Fixes: 99d822b3adc4 ("spi: spi-nxp-fspi: use DLL calibration when clock rate > 100MHz") Signed-off-by: Han Xu Signed-off-by: Haibo Chen Link: https://patch.msgid.link/20250922-fspi-fix-v1-2-ff4315359d31@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-nxp-fspi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index 4e82f9e900ac..96b3654b45ab 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -721,6 +721,12 @@ static void nxp_fspi_dll_calibration(struct nxp_fspi *f) 0, POLL_TOUT, true); if (ret) dev_warn(f->dev, "DLL lock failed, please fix it!\n"); + + /* + * For ERR050272, DLL lock status bit is not accurate, + * wait for 4us more as a workaround. + */ + udelay(4); } /* From f43579ef3500527649b1c233be7cf633806353aa Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 22 Sep 2025 16:47:15 +0800 Subject: [PATCH 030/263] spi: spi-nxp-fspi: limit the clock rate for different sample clock source selection For different sample clock source selection, the max frequency flexspi supported are different. For mode 0, max frequency is 66MHz. For mode 3, the max frequency is 166MHz. Refer to 3.9.9 FlexSPI timing parameters on page 65. https://www.nxp.com/docs/en/data-sheet/IMX8MNCEC.pdf Though flexspi maybe still work under higher frequency, but can't guarantee the stability. IC suggest to add this limitation on all SoCs which contain flexspi. Fixes: c07f27032317 ("spi: spi-nxp-fspi: add the support for sample data from DQS pad") Signed-off-by: Haibo Chen Link: https://patch.msgid.link/20250922-fspi-fix-v1-3-ff4315359d31@nxp.com Signed-off-by: Mark Brown --- drivers/spi/spi-nxp-fspi.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index 96b3654b45ab..b6c79e50d842 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -406,6 +406,8 @@ struct nxp_fspi { int flags; /* save the previous operation clock rate */ unsigned long pre_op_rate; + /* the max clock rate fspi output to device */ + unsigned long max_rate; }; static inline int needs_ip_only(struct nxp_fspi *f) @@ -687,10 +689,13 @@ static void nxp_fspi_select_rx_sample_clk_source(struct nxp_fspi *f, * change the mode back to mode 0. */ reg = fspi_readl(f, f->iobase + FSPI_MCR0); - if (op_is_dtr) + if (op_is_dtr) { reg |= FSPI_MCR0_RXCLKSRC(3); - else /*select mode 0 */ + f->max_rate = 166000000; + } else { /*select mode 0 */ reg &= ~FSPI_MCR0_RXCLKSRC(3); + f->max_rate = 66000000; + } fspi_writel(f, reg, f->iobase + FSPI_MCR0); } @@ -816,6 +821,7 @@ static void nxp_fspi_select_mem(struct nxp_fspi *f, struct spi_device *spi, dev_dbg(f->dev, "Target device [CS:%x] selected\n", spi_get_chipselect(spi, 0)); nxp_fspi_select_rx_sample_clk_source(f, op_is_dtr); + rate = min(f->max_rate, op->max_freq); if (op_is_dtr) { f->flags |= FSPI_DTR_MODE; From 8735696acea24ac1f9d4490992418c71941ca68c Mon Sep 17 00:00:00 2001 From: Mattijs Korpershoek Date: Thu, 9 Oct 2025 09:10:38 +0200 Subject: [PATCH 031/263] spi: cadence-quadspi: Fix pm_runtime unbalance on dma EPROBE_DEFER In csqspi_probe(), when cqspi_request_mmap_dma() returns -EPROBE_DEFER, we handle the error by jumping to probe_setup_failed. In that label, we call pm_runtime_disable(), even if we never called pm_runtime_enable() before. Because of this, the driver cannot probe: [ 2.690018] cadence-qspi 47040000.spi: No Rx DMA available [ 2.699735] spi-nor spi0.0: resume failed with -13 [ 2.699741] spi-nor: probe of spi0.0 failed with error -13 Only call pm_runtime_disable() if it was enabled by adding a new label to handle cqspi_request_mmap_dma() failures. Fixes: b07f349d1864 ("spi: spi-cadence-quadspi: Fix pm runtime unbalance") Signed-off-by: Mattijs Korpershoek Reviewed-by: Dan Carpenter Link: https://patch.msgid.link/20251009-cadence-quadspi-fix-pm-runtime-v2-1-8bdfefc43902@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-cadence-quadspi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 8fb13df8ff87..81017402bc56 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -1995,7 +1995,7 @@ static int cqspi_probe(struct platform_device *pdev) if (cqspi->use_direct_mode) { ret = cqspi_request_mmap_dma(cqspi); if (ret == -EPROBE_DEFER) - goto probe_setup_failed; + goto probe_dma_failed; } if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) { @@ -2019,9 +2019,10 @@ static int cqspi_probe(struct platform_device *pdev) return 0; probe_setup_failed: - cqspi_controller_enable(cqspi, 0); if (!(ddata && (ddata->quirks & CQSPI_DISABLE_RUNTIME_PM))) pm_runtime_disable(dev); +probe_dma_failed: + cqspi_controller_enable(cqspi, 0); probe_reset_failed: if (cqspi->is_jh7110) cqspi_jh7110_disable_clk(pdev, cqspi); From aa960b597600bed80fe171729057dd6aa188b5b5 Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Wed, 24 Sep 2025 09:56:05 +0100 Subject: [PATCH 032/263] arm64: dts: broadcom: bcm2712: Define VGIC interrupt Define the interrupt in the GICv2 for vGIC so KVM can be used, it was missed from the original upstream DTB for some reason. Signed-off-by: Peter Robinson Cc: Andrea della Porta Cc: Phil Elwell Fixes: faa3381267d0 ("arm64: dts: broadcom: Add minimal support for Raspberry Pi 5") Link: https://lore.kernel.org/r/20250924085612.1039247-1-pbrobinson@gmail.com Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/bcm2712.dtsi | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi index e77a66adc22a..205b87f557d6 100644 --- a/arch/arm64/boot/dts/broadcom/bcm2712.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm2712.dtsi @@ -326,6 +326,8 @@ gicv2: interrupt-controller@7fff9000 { <0x7fffe000 0x2000>; interrupt-controller; #address-cells = <0>; + interrupts = ; #interrupt-cells = <3>; }; From 4adc20ba95d472a919f54d441663924e33c92279 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 5 Oct 2025 13:38:16 +0200 Subject: [PATCH 033/263] ARM: dts: broadcom: rpi: Switch to V3D firmware clock Until commit 919d6924ae9b ("clk: bcm: rpi: Turn firmware clock on/off when preparing/unpreparing") the clk-raspberrypi driver wasn't able to change the state of the V3D clock. Only the clk-bcm2835 was able to do this before. After this commit both drivers were able to work against each other, which could result in a system freeze. One step to avoid this conflict is to switch all V3D consumer to the firmware clock. Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/linux-arm-kernel/727aa0c8-2981-4662-adf3-69cac2da956d@samsung.com/ Fixes: 919d6924ae9b ("clk: bcm: rpi: Turn firmware clock on/off when preparing/unpreparing") Signed-off-by: Stefan Wahren Co-developed-by: Melissa Wen Signed-off-by: Melissa Wen Tested-by: Marek Szyprowski Link: https://lore.kernel.org/r/20251005113816.6721-1-wahrenst@gmx.net Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi | 8 ++++++++ arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi | 9 +++++++++ 2 files changed, 17 insertions(+) diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi index c78ed064d166..1eb6406449d1 100644 --- a/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi.dtsi @@ -77,6 +77,14 @@ &i2c0 { /delete-property/ pinctrl-0; }; +&pm { + clocks = <&firmware_clocks 5>, + <&clocks BCM2835_CLOCK_PERI_IMAGE>, + <&clocks BCM2835_CLOCK_H264>, + <&clocks BCM2835_CLOCK_ISP>; + clock-names = "v3d", "peri_image", "h264", "isp"; +}; + &rmem { /* * RPi4's co-processor will copy the board's bootloader configuration diff --git a/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi b/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi index 8b3c21d9f333..fa9d784c88b6 100644 --- a/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi +++ b/arch/arm/boot/dts/broadcom/bcm2835-rpi-common.dtsi @@ -13,7 +13,16 @@ &hdmi { clock-names = "pixel", "hdmi"; }; +&pm { + clocks = <&firmware_clocks 5>, + <&clocks BCM2835_CLOCK_PERI_IMAGE>, + <&clocks BCM2835_CLOCK_H264>, + <&clocks BCM2835_CLOCK_ISP>; + clock-names = "v3d", "peri_image", "h264", "isp"; +}; + &v3d { + clocks = <&firmware_clocks 5>; power-domains = <&power RPI_POWER_DOMAIN_V3D>; }; From 54e96258a6930909b690fd7e8889749231ba8085 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 6 Oct 2025 15:35:36 -1000 Subject: [PATCH 034/263] sched_ext: Mark scx_bpf_dsq_move_set_[slice|vtime]() with KF_RCU scx_bpf_dsq_move_set_slice() and scx_bpf_dsq_move_set_vtime() take a DSQ iterator argument which has to be valid. Mark them with KF_RCU. Fixes: 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()") Cc: stable@vger.kernel.org # v6.12+ Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2b0e88206d07..fc353b8d69f7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -5688,8 +5688,8 @@ BTF_KFUNCS_START(scx_kfunc_ids_dispatch) BTF_ID_FLAGS(func, scx_bpf_dispatch_nr_slots) BTF_ID_FLAGS(func, scx_bpf_dispatch_cancel) BTF_ID_FLAGS(func, scx_bpf_dsq_move_to_local) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU) BTF_KFUNCS_END(scx_kfunc_ids_dispatch) @@ -5820,8 +5820,8 @@ __bpf_kfunc_end_defs(); BTF_KFUNCS_START(scx_kfunc_ids_unlocked) BTF_ID_FLAGS(func, scx_bpf_create_dsq, KF_SLEEPABLE) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice) -BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_slice, KF_RCU) +BTF_ID_FLAGS(func, scx_bpf_dsq_move_set_vtime, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move, KF_RCU) BTF_ID_FLAGS(func, scx_bpf_dsq_move_vtime, KF_RCU) BTF_KFUNCS_END(scx_kfunc_ids_unlocked) From efeeaac9ae9763f9c953e69633c86bc3031e39b5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 9 Oct 2025 13:56:23 -1000 Subject: [PATCH 035/263] sched_ext: Sync error_irq_work before freeing scx_sched By the time scx_sched_free_rcu_work() runs, the scx_sched is no longer reachable. However, a previously queued error_irq_work may still be pending or running. Ensure it completes before proceeding with teardown. Fixes: bff3b5aec1b7 ("sched_ext: Move disable machinery into scx_sched") Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index fc353b8d69f7..a79dfd0f743a 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3471,7 +3471,9 @@ static void scx_sched_free_rcu_work(struct work_struct *work) struct scx_dispatch_q *dsq; int node; + irq_work_sync(&sch->error_irq_work); kthread_stop(sch->helper->task); + free_percpu(sch->pcpu); for_each_node_state(node, N_POSSIBLE) From a8ad873113d3fe01f9b5d737d4b0570fa36826b0 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Fri, 10 Oct 2025 12:12:50 -0700 Subject: [PATCH 036/263] sched_ext: defer queue_balance_callback() until after ops.dispatch The sched_ext code calls queue_balance_callback() during enqueue_task() to defer operations that drop multiple locks until we can unpin them. The call assumes that the rq lock is held until the callbacks are invoked, and the pending callbacks will not be visible to any other threads. This is enforced by a WARN_ON_ONCE() in rq_pin_lock(). However, balance_one() may actually drop the lock during a BPF dispatch call. Another thread may win the race to get the rq lock and see the pending callback. To avoid this, sched_ext must only queue the callback after the dispatch calls have completed. CPU 0 CPU 1 CPU 2 scx_balance() rq_unpin_lock() scx_balance_one() |= IN_BALANCE scx_enqueue() ops.dispatch() rq_unlock() rq_lock() queue_balance_callback() rq_unlock() [WARN] rq_pin_lock() rq_lock() &= ~IN_BALANCE rq_repin_lock() Changelog v2-> v1 (https://lore.kernel.org/sched-ext/aOgOxtHCeyRT_7jn@gpd4) - Fixed explanation in patch description (Andrea) - Fixed scx_rq mask state updates (Andrea) - Added Reviewed-by tag from Andrea Reported-by: Jakub Kicinski Signed-off-by: Emil Tsalapatis (Meta) Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 29 +++++++++++++++++++++++++++-- kernel/sched/sched.h | 1 + 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index a79dfd0f743a..1352e6a5b089 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -780,13 +780,23 @@ static void schedule_deferred(struct rq *rq) if (rq->scx.flags & SCX_RQ_IN_WAKEUP) return; + /* Don't do anything if there already is a deferred operation. */ + if (rq->scx.flags & SCX_RQ_BAL_PENDING) + return; + /* * If in balance, the balance callbacks will be called before rq lock is * released. Schedule one. + * + * + * We can't directly insert the callback into the + * rq's list: The call can drop its lock and make the pending balance + * callback visible to unrelated code paths that call rq_pin_lock(). + * + * Just let balance_one() know that it must do it itself. */ if (rq->scx.flags & SCX_RQ_IN_BALANCE) { - queue_balance_callback(rq, &rq->scx.deferred_bal_cb, - deferred_bal_cb_workfn); + rq->scx.flags |= SCX_RQ_BAL_CB_PENDING; return; } @@ -2003,6 +2013,19 @@ static void flush_dispatch_buf(struct scx_sched *sch, struct rq *rq) dspc->cursor = 0; } +static inline void maybe_queue_balance_callback(struct rq *rq) +{ + lockdep_assert_rq_held(rq); + + if (!(rq->scx.flags & SCX_RQ_BAL_CB_PENDING)) + return; + + queue_balance_callback(rq, &rq->scx.deferred_bal_cb, + deferred_bal_cb_workfn); + + rq->scx.flags &= ~SCX_RQ_BAL_CB_PENDING; +} + static int balance_one(struct rq *rq, struct task_struct *prev) { struct scx_sched *sch = scx_root; @@ -2150,6 +2173,8 @@ static int balance_scx(struct rq *rq, struct task_struct *prev, #endif rq_repin_lock(rq, rf); + maybe_queue_balance_callback(rq); + return ret; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1f5d07067f60..3f7fab3d7960 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -784,6 +784,7 @@ enum scx_rq_flags { SCX_RQ_BAL_KEEP = 1 << 3, /* balance decided to keep current */ SCX_RQ_BYPASSING = 1 << 4, SCX_RQ_CLK_VALID = 1 << 5, /* RQ clock is fresh and valid */ + SCX_RQ_BAL_CB_PENDING = 1 << 6, /* must queue a cb after dispatching */ SCX_RQ_IN_WAKEUP = 1 << 16, SCX_RQ_IN_BALANCE = 1 << 17, From 14c1da3895a116f4e32c20487046655f26d3999b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 8 Oct 2025 13:43:26 -1000 Subject: [PATCH 037/263] sched_ext: Allocate scx_kick_cpus_pnt_seqs lazily using kvzalloc() On systems with >4096 CPUs, scx_kick_cpus_pnt_seqs allocation fails during boot because it exceeds the 32,768 byte percpu allocator limit. Restructure to use DEFINE_PER_CPU() for the per-CPU pointers, with each CPU pointing to its own kvzalloc'd array. Move allocation from boot time to scx_enable() and free in scx_disable(), so the O(nr_cpu_ids^2) memory is only consumed when sched_ext is active. Use RCU to guard against racing with free. Arrays are freed via call_rcu() and kick_cpus_irq_workfn() uses rcu_dereference_bh() with a NULL check. While at it, rename to scx_kick_pseqs for brevity and update comments to clarify these are pick_task sequence numbers. v2: RCU protect scx_kick_seqs to manage kick_cpus_irq_workfn() racing against disable as per Andrea. v3: Fix bugs notcied by Andrea. Reported-by: Phil Auld Link: http://lkml.kernel.org/r/20251007133523.GA93086@pauld.westford.csb Cc: Andrea Righi Reviewed-by: Emil Tsalapatis Reviewed-by: Phil Auld Reviewed-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 89 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 10 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 1352e6a5b089..c645d47124e7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -67,8 +67,19 @@ static unsigned long scx_watchdog_timestamp = INITIAL_JIFFIES; static struct delayed_work scx_watchdog_work; -/* for %SCX_KICK_WAIT */ -static unsigned long __percpu *scx_kick_cpus_pnt_seqs; +/* + * For %SCX_KICK_WAIT: Each CPU has a pointer to an array of pick_task sequence + * numbers. The arrays are allocated with kvzalloc() as size can exceed percpu + * allocator limits on large machines. O(nr_cpu_ids^2) allocation, allocated + * lazily when enabling and freed when disabling to avoid waste when sched_ext + * isn't active. + */ +struct scx_kick_pseqs { + struct rcu_head rcu; + unsigned long seqs[]; +}; + +static DEFINE_PER_CPU(struct scx_kick_pseqs __rcu *, scx_kick_pseqs); /* * Direct dispatch marker. @@ -3877,6 +3888,27 @@ static const char *scx_exit_reason(enum scx_exit_kind kind) } } +static void free_kick_pseqs_rcu(struct rcu_head *rcu) +{ + struct scx_kick_pseqs *pseqs = container_of(rcu, struct scx_kick_pseqs, rcu); + + kvfree(pseqs); +} + +static void free_kick_pseqs(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct scx_kick_pseqs **pseqs = per_cpu_ptr(&scx_kick_pseqs, cpu); + struct scx_kick_pseqs *to_free; + + to_free = rcu_replace_pointer(*pseqs, NULL, true); + if (to_free) + call_rcu(&to_free->rcu, free_kick_pseqs_rcu); + } +} + static void scx_disable_workfn(struct kthread_work *work) { struct scx_sched *sch = container_of(work, struct scx_sched, disable_work); @@ -4013,6 +4045,7 @@ static void scx_disable_workfn(struct kthread_work *work) free_percpu(scx_dsp_ctx); scx_dsp_ctx = NULL; scx_dsp_max_batch = 0; + free_kick_pseqs(); mutex_unlock(&scx_enable_mutex); @@ -4375,6 +4408,33 @@ static void scx_vexit(struct scx_sched *sch, irq_work_queue(&sch->error_irq_work); } +static int alloc_kick_pseqs(void) +{ + int cpu; + + /* + * Allocate per-CPU arrays sized by nr_cpu_ids. Use kvzalloc as size + * can exceed percpu allocator limits on large machines. + */ + for_each_possible_cpu(cpu) { + struct scx_kick_pseqs **pseqs = per_cpu_ptr(&scx_kick_pseqs, cpu); + struct scx_kick_pseqs *new_pseqs; + + WARN_ON_ONCE(rcu_access_pointer(*pseqs)); + + new_pseqs = kvzalloc_node(struct_size(new_pseqs, seqs, nr_cpu_ids), + GFP_KERNEL, cpu_to_node(cpu)); + if (!new_pseqs) { + free_kick_pseqs(); + return -ENOMEM; + } + + rcu_assign_pointer(*pseqs, new_pseqs); + } + + return 0; +} + static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops) { struct scx_sched *sch; @@ -4517,15 +4577,19 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) mutex_lock(&scx_enable_mutex); + ret = alloc_kick_pseqs(); + if (ret) + goto err_unlock; + if (scx_enable_state() != SCX_DISABLED) { ret = -EBUSY; - goto err_unlock; + goto err_free_pseqs; } sch = scx_alloc_and_add_sched(ops); if (IS_ERR(sch)) { ret = PTR_ERR(sch); - goto err_unlock; + goto err_free_pseqs; } /* @@ -4728,6 +4792,8 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) return 0; +err_free_pseqs: + free_kick_pseqs(); err_unlock: mutex_unlock(&scx_enable_mutex); return ret; @@ -5109,10 +5175,18 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work) { struct rq *this_rq = this_rq(); struct scx_rq *this_scx = &this_rq->scx; - unsigned long *pseqs = this_cpu_ptr(scx_kick_cpus_pnt_seqs); + struct scx_kick_pseqs __rcu *pseqs_pcpu = __this_cpu_read(scx_kick_pseqs); bool should_wait = false; + unsigned long *pseqs; s32 cpu; + if (unlikely(!pseqs_pcpu)) { + pr_warn_once("kick_cpus_irq_workfn() called with NULL scx_kick_pseqs"); + return; + } + + pseqs = rcu_dereference_bh(pseqs_pcpu)->seqs; + for_each_cpu(cpu, this_scx->cpus_to_kick) { should_wait |= kick_one_cpu(cpu, this_rq, pseqs); cpumask_clear_cpu(cpu, this_scx->cpus_to_kick); @@ -5235,11 +5309,6 @@ void __init init_sched_ext_class(void) scx_idle_init_masks(); - scx_kick_cpus_pnt_seqs = - __alloc_percpu(sizeof(scx_kick_cpus_pnt_seqs[0]) * nr_cpu_ids, - __alignof__(scx_kick_cpus_pnt_seqs[0])); - BUG_ON(!scx_kick_cpus_pnt_seqs); - for_each_possible_cpu(cpu) { struct rq *rq = cpu_rq(cpu); int n = cpu_to_node(cpu); From 8607edcd1748503f4f58e66ca0216170f260c79b Mon Sep 17 00:00:00 2001 From: Michal Pecio Date: Tue, 14 Oct 2025 01:55:40 +0300 Subject: [PATCH 038/263] usb: xhci-pci: Fix USB2-only root hub registration A recent change to hide USB3 root hubs of USB2-only controllers broke registration of USB2 root hubs - allow_single_roothub is set too late, and by this time xhci_run() has already deferred root hub registration until after the shared HCD is added, which will never happen. This makes such controllers unusable, but testers didn't notice since they were only bothered by warnings about empty USB3 root hubs. The bug causes problems to other people who actually use such HCs and I was able to confirm it on an ordinary HC by patching to ignore USB3 ports. Setting allow_single_roothub during early setup fixes things. Reported-by: Arisa Snowbell Closes: https://lore.kernel.org/linux-usb/CABpa4MA9unucCoKtSdzJyOLjHNVy+Cwgz5AnAxPkKw6vuox1Nw@mail.gmail.com/ Reported-by: Michal Kubecek Closes: https://lore.kernel.org/linux-usb/lnb5bum7dnzkn3fc7gq6hwigslebo7o4ccflcvsc3lvdgnu7el@fvqpobbdoapl/ Fixes: 719de070f764 ("usb: xhci-pci: add support for hosts with zero USB3 ports") Tested-by: Arisa Snowbell Tested-by: Michal Kubecek Suggested-by: Mathias Nyman Signed-off-by: Michal Pecio Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5c8ab519f497..f67a4d956204 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -582,6 +582,8 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (!usb_hcd_is_primary_hcd(hcd)) return 0; + xhci->allow_single_roothub = 1; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) xhci_pme_acpi_rtd3_enable(pdev); @@ -637,7 +639,6 @@ int xhci_pci_common_probe(struct pci_dev *dev, const struct pci_device_id *id) xhci = hcd_to_xhci(hcd); xhci->reset = reset; - xhci->allow_single_roothub = 1; if (!xhci_has_one_roothub(xhci)) { xhci->shared_hcd = usb_create_shared_hcd(&xhci_pci_hc_driver, &dev->dev, pci_name(dev), hcd); From f3d12ec847b945d5d65846c85f062d07d5e73164 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 14 Oct 2025 01:55:41 +0300 Subject: [PATCH 039/263] xhci: dbc: fix bogus 1024 byte prefix if ttyDBC read races with stall event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DbC may add 1024 bogus bytes to the beginneing of the receiving endpoint if DbC hw triggers a STALL event before any Transfer Blocks (TRBs) for incoming data are queued, but driver handles the event after it queued the TRBs. This is possible as xHCI DbC hardware may trigger spurious STALL transfer events even if endpoint is empty. The STALL event contains a pointer to the stalled TRB, and "remaining" untransferred data length. As there are no TRBs queued yet the STALL event will just point to first TRB position of the empty ring, with '0' bytes remaining untransferred. DbC driver is polling for events, and may not handle the STALL event before /dev/ttyDBC0 is opened and incoming data TRBs are queued. The DbC event handler will now assume the first queued TRB (length 1024) has stalled with '0' bytes remaining untransferred, and copies the data This race situation can be practically mitigated by making sure the event handler handles all pending transfer events when DbC reaches configured state, and only then create dev/ttyDbC0, and start queueing transfers. The event handler can this way detect the STALL events on empty rings and discard them before any transfers are queued. This does in practice solve the issue, but still leaves a small possible gap for the race to trigger. We still need a way to distinguish spurious STALLs on empty rings with '0' bytes remaing, from actual STALL events with all bytes transmitted. Cc: stable Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver") Tested-by: Łukasz Bartosik Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 63edf2d8f245..023a8ec6f305 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -892,7 +892,8 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) dev_info(dbc->dev, "DbC configured\n"); portsc = readl(&dbc->regs->portsc); writel(portsc, &dbc->regs->portsc); - return EVT_GSER; + ret = EVT_GSER; + break; } return EVT_DONE; @@ -954,7 +955,8 @@ static enum evtreturn xhci_dbc_do_handle_events(struct xhci_dbc *dbc) break; case TRB_TYPE(TRB_TRANSFER): dbc_handle_xfer_event(dbc, evt); - ret = EVT_XFER_DONE; + if (ret != EVT_GSER) + ret = EVT_XFER_DONE; break; default: break; From 2bbd38fcd29670e46c0fdb9cd0e90507a8a1bf6a Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Tue, 14 Oct 2025 01:55:42 +0300 Subject: [PATCH 040/263] xhci: dbc: enable back DbC in resume if it was enabled before suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DbC is currently only enabled back if it's in configured state during suspend. If system is suspended after DbC is enabled, but before the device is properly enumerated by the host, then DbC would not be enabled back in resume. Always enable DbC back in resume if it's suspended in enabled, connected, or configured state Cc: stable Fixes: dfba2174dc42 ("usb: xhci: Add DbC support in xHCI driver") Tested-by: Łukasz Bartosik Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index 023a8ec6f305..ecda964e018a 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -1392,8 +1392,15 @@ int xhci_dbc_suspend(struct xhci_hcd *xhci) if (!dbc) return 0; - if (dbc->state == DS_CONFIGURED) + switch (dbc->state) { + case DS_ENABLED: + case DS_CONNECTED: + case DS_CONFIGURED: dbc->resume_required = 1; + break; + default: + break; + } xhci_dbc_stop(dbc); From 05e63305c85c88141500f0a2fb02afcfba9396e1 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Mon, 13 Oct 2025 22:36:34 +0200 Subject: [PATCH 041/263] sched_ext: Fix scx_kick_pseqs corruption on concurrent scheduler loads If we load a BPF scheduler while another scheduler is already running, alloc_kick_pseqs() would be called again, overwriting the previously allocated arrays. Fix by moving the alloc_kick_pseqs() call after the scx_enable_state() check, ensuring that the arrays are only allocated when a scheduler can actually be loaded. Fixes: 14c1da3895a11 ("sched_ext: Allocate scx_kick_cpus_pnt_seqs lazily using kvzalloc()") Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index c645d47124e7..12c9c3595692 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -4577,15 +4577,15 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link) mutex_lock(&scx_enable_mutex); + if (scx_enable_state() != SCX_DISABLED) { + ret = -EBUSY; + goto err_unlock; + } + ret = alloc_kick_pseqs(); if (ret) goto err_unlock; - if (scx_enable_state() != SCX_DISABLED) { - ret = -EBUSY; - goto err_free_pseqs; - } - sch = scx_alloc_and_add_sched(ops); if (IS_ERR(sch)) { ret = PTR_ERR(sch); From 6b6e03106163458716c47df2baa9ad08ed4ddb0e Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 15 Oct 2025 15:36:59 +0800 Subject: [PATCH 042/263] spi: amlogic: fix spifc build error There is an error building when Compiler version: gcc (GCC) 14.3.0 Assembler version: GNU assembler (GNU Binutils) 2.44 " Error log: WARNING: modpost: missing MODULE_DESCRIPTION() in arch/arm/probes/kprobes/test-kprobes.o ERROR: modpost: "__ffsdi2" [drivers/spi/spi-amlogic-spifc-a4.ko] undefined! " Use __ffs API instead of __bf_shf to be safer. Reported-by: Guenter Roeck Closes: https://lore.kernel.org/all/f594c621-f9e1-49f2-af31-23fbcb176058@roeck-us.net/ Fixes: 4670db6f32e9 ("spi: amlogic: add driver for Amlogic SPI Flash Controller") Signed-off-by: Xianwei Zhao Link: https://patch.msgid.link/20251015-fix-spifc-a4-v1-1-08e0900e5b7e@amlogic.com Signed-off-by: Mark Brown --- drivers/spi/spi-amlogic-spifc-a4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-amlogic-spifc-a4.c b/drivers/spi/spi-amlogic-spifc-a4.c index 4338d00e56a6..35a7c4965e11 100644 --- a/drivers/spi/spi-amlogic-spifc-a4.c +++ b/drivers/spi/spi-amlogic-spifc-a4.c @@ -286,7 +286,7 @@ static int aml_sfc_set_bus_width(struct aml_sfc *sfc, u8 buswidth, u32 mask) for (i = 0; i <= LANE_MAX; i++) { if (buswidth == 1 << i) { - conf = i << __bf_shf(mask); + conf = i << __ffs(mask); return regmap_update_bits(sfc->regmap_base, SFC_SPI_CFG, mask, conf); } @@ -566,7 +566,7 @@ static int aml_sfc_raw_io_op(struct aml_sfc *sfc, const struct spi_mem_op *op) if (!op->data.nbytes) goto end_xfer; - conf = (op->data.nbytes >> RAW_SIZE_BW) << __bf_shf(RAW_EXT_SIZE); + conf = (op->data.nbytes >> RAW_SIZE_BW) << __ffs(RAW_EXT_SIZE); ret = regmap_update_bits(sfc->regmap_base, SFC_SPI_CFG, RAW_EXT_SIZE, conf); if (ret) goto err_out; From 2290ab43b9d8eafb8046387f10a8dfa2b030ba46 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 14 Oct 2025 12:53:44 +0100 Subject: [PATCH 043/263] firmware: arm_scmi: Account for failed debug initialization When the SCMI debug subsystem fails to initialize, the related debug root will be missing, and the underlying descriptor will be NULL. Handle this fault condition in the SCMI debug helpers that maintain metrics counters. Fixes: 0b3d48c4726e ("firmware: arm_scmi: Track basic SCMI communication debug metrics") Signed-off-by: Cristian Marussi Message-Id: <20251014115346.2391418-1-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/common.h | 24 ++++++++++++++-- drivers/firmware/arm_scmi/driver.c | 44 ++++++++++-------------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 07b9e629276d..21c0b95027c6 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -309,10 +309,28 @@ enum debug_counters { SCMI_DEBUG_COUNTERS_LAST }; -static inline void scmi_inc_count(atomic_t *arr, int stat) +/** + * struct scmi_debug_info - Debug common info + * @top_dentry: A reference to the top debugfs dentry + * @name: Name of this SCMI instance + * @type: Type of this SCMI instance + * @is_atomic: Flag to state if the transport of this instance is atomic + * @counters: An array of atomic_c's used for tracking statistics (if enabled) + */ +struct scmi_debug_info { + struct dentry *top_dentry; + const char *name; + const char *type; + bool is_atomic; + atomic_t counters[SCMI_DEBUG_COUNTERS_LAST]; +}; + +static inline void scmi_inc_count(struct scmi_debug_info *dbg, int stat) { - if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) - atomic_inc(&arr[stat]); + if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) { + if (dbg) + atomic_inc(&dbg->counters[stat]); + } } static inline void scmi_dec_count(atomic_t *arr, int stat) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index bd56a877fdfc..56419285c0bf 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -115,22 +115,6 @@ struct scmi_protocol_instance { #define ph_to_pi(h) container_of(h, struct scmi_protocol_instance, ph) -/** - * struct scmi_debug_info - Debug common info - * @top_dentry: A reference to the top debugfs dentry - * @name: Name of this SCMI instance - * @type: Type of this SCMI instance - * @is_atomic: Flag to state if the transport of this instance is atomic - * @counters: An array of atomic_c's used for tracking statistics (if enabled) - */ -struct scmi_debug_info { - struct dentry *top_dentry; - const char *name; - const char *type; - bool is_atomic; - atomic_t counters[SCMI_DEBUG_COUNTERS_LAST]; -}; - /** * struct scmi_info - Structure representing a SCMI instance * @@ -1034,7 +1018,7 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr) spin_unlock_irqrestore(&minfo->xfer_lock, flags); scmi_bad_message_trace(cinfo, msg_hdr, MSG_UNEXPECTED); - scmi_inc_count(info->dbg->counters, ERR_MSG_UNEXPECTED); + scmi_inc_count(info->dbg, ERR_MSG_UNEXPECTED); return xfer; } @@ -1062,7 +1046,7 @@ scmi_xfer_command_acquire(struct scmi_chan_info *cinfo, u32 msg_hdr) msg_type, xfer_id, msg_hdr, xfer->state); scmi_bad_message_trace(cinfo, msg_hdr, MSG_INVALID); - scmi_inc_count(info->dbg->counters, ERR_MSG_INVALID); + scmi_inc_count(info->dbg, ERR_MSG_INVALID); /* On error the refcount incremented above has to be dropped */ __scmi_xfer_put(minfo, xfer); @@ -1107,7 +1091,7 @@ static void scmi_handle_notification(struct scmi_chan_info *cinfo, PTR_ERR(xfer)); scmi_bad_message_trace(cinfo, msg_hdr, MSG_NOMEM); - scmi_inc_count(info->dbg->counters, ERR_MSG_NOMEM); + scmi_inc_count(info->dbg, ERR_MSG_NOMEM); scmi_clear_channel(info, cinfo); return; @@ -1123,7 +1107,7 @@ static void scmi_handle_notification(struct scmi_chan_info *cinfo, trace_scmi_msg_dump(info->id, cinfo->id, xfer->hdr.protocol_id, xfer->hdr.id, "NOTI", xfer->hdr.seq, xfer->hdr.status, xfer->rx.buf, xfer->rx.len); - scmi_inc_count(info->dbg->counters, NOTIFICATION_OK); + scmi_inc_count(info->dbg, NOTIFICATION_OK); scmi_notify(cinfo->handle, xfer->hdr.protocol_id, xfer->hdr.id, xfer->rx.buf, xfer->rx.len, ts); @@ -1183,10 +1167,10 @@ static void scmi_handle_response(struct scmi_chan_info *cinfo, if (xfer->hdr.type == MSG_TYPE_DELAYED_RESP) { scmi_clear_channel(info, cinfo); complete(xfer->async_done); - scmi_inc_count(info->dbg->counters, DELAYED_RESPONSE_OK); + scmi_inc_count(info->dbg, DELAYED_RESPONSE_OK); } else { complete(&xfer->done); - scmi_inc_count(info->dbg->counters, RESPONSE_OK); + scmi_inc_count(info->dbg, RESPONSE_OK); } if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { @@ -1296,7 +1280,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, "timed out in resp(caller: %pS) - polling\n", (void *)_RET_IP_); ret = -ETIMEDOUT; - scmi_inc_count(info->dbg->counters, XFERS_RESPONSE_POLLED_TIMEOUT); + scmi_inc_count(info->dbg, XFERS_RESPONSE_POLLED_TIMEOUT); } } @@ -1321,7 +1305,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, "RESP" : "resp", xfer->hdr.seq, xfer->hdr.status, xfer->rx.buf, xfer->rx.len); - scmi_inc_count(info->dbg->counters, RESPONSE_POLLED_OK); + scmi_inc_count(info->dbg, RESPONSE_POLLED_OK); if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { scmi_raw_message_report(info->raw, xfer, @@ -1336,7 +1320,7 @@ static int scmi_wait_for_reply(struct device *dev, const struct scmi_desc *desc, dev_err(dev, "timed out in resp(caller: %pS)\n", (void *)_RET_IP_); ret = -ETIMEDOUT; - scmi_inc_count(info->dbg->counters, XFERS_RESPONSE_TIMEOUT); + scmi_inc_count(info->dbg, XFERS_RESPONSE_TIMEOUT); } } @@ -1420,13 +1404,13 @@ static int do_xfer(const struct scmi_protocol_handle *ph, !is_transport_polling_capable(info->desc)) { dev_warn_once(dev, "Polling mode is not supported by transport.\n"); - scmi_inc_count(info->dbg->counters, SENT_FAIL_POLLING_UNSUPPORTED); + scmi_inc_count(info->dbg, SENT_FAIL_POLLING_UNSUPPORTED); return -EINVAL; } cinfo = idr_find(&info->tx_idr, pi->proto->id); if (unlikely(!cinfo)) { - scmi_inc_count(info->dbg->counters, SENT_FAIL_CHANNEL_NOT_FOUND); + scmi_inc_count(info->dbg, SENT_FAIL_CHANNEL_NOT_FOUND); return -EINVAL; } /* True ONLY if also supported by transport. */ @@ -1461,19 +1445,19 @@ static int do_xfer(const struct scmi_protocol_handle *ph, ret = info->desc->ops->send_message(cinfo, xfer); if (ret < 0) { dev_dbg(dev, "Failed to send message %d\n", ret); - scmi_inc_count(info->dbg->counters, SENT_FAIL); + scmi_inc_count(info->dbg, SENT_FAIL); return ret; } trace_scmi_msg_dump(info->id, cinfo->id, xfer->hdr.protocol_id, xfer->hdr.id, "CMND", xfer->hdr.seq, xfer->hdr.status, xfer->tx.buf, xfer->tx.len); - scmi_inc_count(info->dbg->counters, SENT_OK); + scmi_inc_count(info->dbg, SENT_OK); ret = scmi_wait_for_message_response(cinfo, xfer); if (!ret && xfer->hdr.status) { ret = scmi_to_linux_errno(xfer->hdr.status); - scmi_inc_count(info->dbg->counters, ERR_PROTOCOL); + scmi_inc_count(info->dbg, ERR_PROTOCOL); } if (info->desc->ops->mark_txdone) From 289ce7e9a5e1a52ac7e522a3e389dc16be08d7a4 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 14 Oct 2025 12:53:45 +0100 Subject: [PATCH 044/263] include: trace: Fix inflight count helper on failed initialization Add a check to the scmi_inflight_count() helper to handle the case when the SCMI debug subsystem fails to initialize. Fixes: f8e656382b4a ("include: trace: Add tracepoint support for inflight xfer count") Signed-off-by: Cristian Marussi Message-Id: <20251014115346.2391418-2-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/common.h | 8 +++++--- drivers/firmware/arm_scmi/driver.c | 7 +++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/firmware/arm_scmi/common.h b/drivers/firmware/arm_scmi/common.h index 21c0b95027c6..7c35c95fddba 100644 --- a/drivers/firmware/arm_scmi/common.h +++ b/drivers/firmware/arm_scmi/common.h @@ -333,10 +333,12 @@ static inline void scmi_inc_count(struct scmi_debug_info *dbg, int stat) } } -static inline void scmi_dec_count(atomic_t *arr, int stat) +static inline void scmi_dec_count(struct scmi_debug_info *dbg, int stat) { - if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) - atomic_dec(&arr[stat]); + if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) { + if (dbg) + atomic_dec(&dbg->counters[stat]); + } } enum scmi_bad_msg { diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 56419285c0bf..1cd15412024c 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -594,7 +594,7 @@ scmi_xfer_inflight_register_unlocked(struct scmi_xfer *xfer, /* Set in-flight */ set_bit(xfer->hdr.seq, minfo->xfer_alloc_table); hash_add(minfo->pending_xfers, &xfer->node, xfer->hdr.seq); - scmi_inc_count(info->dbg->counters, XFERS_INFLIGHT); + scmi_inc_count(info->dbg, XFERS_INFLIGHT); xfer->pending = true; } @@ -803,7 +803,7 @@ __scmi_xfer_put(struct scmi_xfers_info *minfo, struct scmi_xfer *xfer) hash_del(&xfer->node); xfer->pending = false; - scmi_dec_count(info->dbg->counters, XFERS_INFLIGHT); + scmi_dec_count(info->dbg, XFERS_INFLIGHT); } hlist_add_head(&xfer->node, &minfo->free_xfers); } @@ -3407,6 +3407,9 @@ int scmi_inflight_count(const struct scmi_handle *handle) if (IS_ENABLED(CONFIG_ARM_SCMI_DEBUG_COUNTERS)) { struct scmi_info *info = handle_to_scmi_info(handle); + if (!info->dbg) + return 0; + return atomic_read(&info->dbg->counters[XFERS_INFLIGHT]); } else { return 0; From 092b9e2ce6dd63d2f36822751a51957412706986 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Tue, 14 Oct 2025 12:53:46 +0100 Subject: [PATCH 045/263] firmware: arm_scmi: Skip RAW initialization on failure Avoid attempting to initialize RAW mode when the debug subsystem itself has failed to initialize, since doing so is pointless and emits misleading error messages. Signed-off-by: Cristian Marussi Message-Id: <20251014115346.2391418-3-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 1cd15412024c..eb46694cb14b 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -3028,9 +3028,6 @@ static int scmi_debugfs_raw_mode_setup(struct scmi_info *info) u8 channels[SCMI_MAX_CHANNELS] = {}; DECLARE_BITMAP(protos, SCMI_MAX_CHANNELS) = {}; - if (!info->dbg) - return -EINVAL; - /* Enumerate all channels to collect their ids */ idr_for_each_entry(&info->tx_idr, cinfo, id) { /* @@ -3202,7 +3199,7 @@ static int scmi_probe(struct platform_device *pdev) if (!info->dbg) dev_warn(dev, "Failed to setup SCMI debugfs.\n"); - if (IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { + if (info->dbg && IS_ENABLED(CONFIG_ARM_SCMI_RAW_MODE_SUPPORT)) { ret = scmi_debugfs_raw_mode_setup(info); if (!coex) { if (ret) From 20b93a0088a595bceed4a026d527cbbac4e876c5 Mon Sep 17 00:00:00 2001 From: Artem Shimko Date: Wed, 8 Oct 2025 12:10:57 +0300 Subject: [PATCH 046/263] firmware: arm_scmi: Fix premature SCMI_XFER_FLAG_IS_RAW clearing in raw mode The SCMI_XFER_FLAG_IS_RAW flag was being cleared prematurely in scmi_xfer_raw_put() before the transfer completion was properly acknowledged by the raw message handlers. Move the clearing of SCMI_XFER_FLAG_IS_RAW and SCMI_XFER_FLAG_CHAN_SET from scmi_xfer_raw_put() to __scmi_xfer_put() to ensure the flags remain set throughout the entire raw message processing pipeline until the transfer is returned to the free pool. Fixes: 3095a3e25d8f ("firmware: arm_scmi: Add xfer helpers to provide raw access") Suggested-by: Cristian Marussi Signed-off-by: Artem Shimko Reviewed-by: Cristian Marussi Message-Id: <20251008091057.1969260-1-a.shimko.dev@gmail.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/driver.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index eb46694cb14b..5caa9191a8d1 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -805,6 +805,7 @@ __scmi_xfer_put(struct scmi_xfers_info *minfo, struct scmi_xfer *xfer) scmi_dec_count(info->dbg, XFERS_INFLIGHT); } + xfer->flags = 0; hlist_add_head(&xfer->node, &minfo->free_xfers); } spin_unlock_irqrestore(&minfo->xfer_lock, flags); @@ -823,8 +824,6 @@ void scmi_xfer_raw_put(const struct scmi_handle *handle, struct scmi_xfer *xfer) { struct scmi_info *info = handle_to_scmi_info(handle); - xfer->flags &= ~SCMI_XFER_FLAG_IS_RAW; - xfer->flags &= ~SCMI_XFER_FLAG_CHAN_SET; return __scmi_xfer_put(&info->tx_minfo, xfer); } From 4314ffce4eb81a6c18700af1b6e29b6e0c6b9e37 Mon Sep 17 00:00:00 2001 From: Mikhail Kshevetskiy Date: Sun, 12 Oct 2025 15:16:52 +0300 Subject: [PATCH 047/263] spi: airoha: return an error for continuous mode dirmap creation cases This driver can accelerate single page operations only, thus continuous reading mode should not be used. Continuous reading will use sizes up to the size of one erase block. This size is much larger than the size of single flash page. Use this difference to identify continuous reading and return an error. Signed-off-by: Mikhail Kshevetskiy Reviewed-by: Frieder Schrempf Reviewed-by: AngeloGioacchino Del Regno Fixes: a403997c12019 ("spi: airoha: add SPI-NAND Flash controller driver") Link: https://patch.msgid.link/20251012121707.2296160-2-mikhail.kshevetskiy@iopsys.eu Signed-off-by: Mark Brown --- drivers/spi/spi-airoha-snfi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index dbe640986825..043a03cd90a1 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -618,6 +618,10 @@ static int airoha_snand_dirmap_create(struct spi_mem_dirmap_desc *desc) if (desc->info.offset + desc->info.length > U32_MAX) return -EINVAL; + /* continuous reading is not supported */ + if (desc->info.length > SPI_NAND_CACHE_SIZE) + return -E2BIG; + if (!airoha_snand_supports_op(desc->mem, &desc->info.op_tmpl)) return -EOPNOTSUPP; From edd2e261b1babb92213089b5feadca12e3459322 Mon Sep 17 00:00:00 2001 From: Mikhail Kshevetskiy Date: Sun, 12 Oct 2025 15:16:54 +0300 Subject: [PATCH 048/263] spi: airoha: add support of dual/quad wires spi modes to exec_op() handler Booting without this patch and disabled dirmap support results in [ 2.980719] spi-nand spi0.0: Micron SPI NAND was found. [ 2.986040] spi-nand spi0.0: 256 MiB, block size: 128 KiB, page size: 2048, OOB size: 128 [ 2.994709] 2 fixed-partitions partitions found on MTD device spi0.0 [ 3.001075] Creating 2 MTD partitions on "spi0.0": [ 3.005862] 0x000000000000-0x000000020000 : "bl2" [ 3.011272] 0x000000020000-0x000010000000 : "ubi" ... [ 6.195594] ubi0: attaching mtd1 [ 13.338398] ubi0: scanning is finished [ 13.342188] ubi0 error: ubi_read_volume_table: the layout volume was not found [ 13.349784] ubi0 error: ubi_attach_mtd_dev: failed to attach mtd1, error -22 [ 13.356897] UBI error: cannot attach mtd1 If dirmap is disabled or not supported in the spi driver, the dirmap requests will be executed via exec_op() handler. Thus, if the hardware supports dual/quad spi modes, then corresponding requests will be sent to exec_op() handler. Current driver does not support such requests, so error is arrised. As result the flash can't be read/write. This patch adds support of dual and quad wires spi modes to exec_op() handler. Fixes: a403997c12019 ("spi: airoha: add SPI-NAND Flash controller driver") Signed-off-by: Mikhail Kshevetskiy Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20251012121707.2296160-4-mikhail.kshevetskiy@iopsys.eu Signed-off-by: Mark Brown --- drivers/spi/spi-airoha-snfi.c | 108 ++++++++++++++++++++++++++-------- 1 file changed, 82 insertions(+), 26 deletions(-) diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index 043a03cd90a1..0b89dc42545b 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -192,6 +192,14 @@ #define SPI_NAND_OP_RESET 0xff #define SPI_NAND_OP_DIE_SELECT 0xc2 +/* SNAND FIFO commands */ +#define SNAND_FIFO_TX_BUSWIDTH_SINGLE 0x08 +#define SNAND_FIFO_TX_BUSWIDTH_DUAL 0x09 +#define SNAND_FIFO_TX_BUSWIDTH_QUAD 0x0a +#define SNAND_FIFO_RX_BUSWIDTH_SINGLE 0x0c +#define SNAND_FIFO_RX_BUSWIDTH_DUAL 0x0e +#define SNAND_FIFO_RX_BUSWIDTH_QUAD 0x0f + #define SPI_NAND_CACHE_SIZE (SZ_4K + SZ_256) #define SPI_MAX_TRANSFER_SIZE 511 @@ -387,10 +395,26 @@ static int airoha_snand_set_mode(struct airoha_snand_ctrl *as_ctrl, return regmap_write(as_ctrl->regmap_ctrl, REG_SPI_CTRL_DUMMY, 0); } -static int airoha_snand_write_data(struct airoha_snand_ctrl *as_ctrl, u8 cmd, - const u8 *data, int len) +static int airoha_snand_write_data(struct airoha_snand_ctrl *as_ctrl, + const u8 *data, int len, int buswidth) { int i, data_len; + u8 cmd; + + switch (buswidth) { + case 0: + case 1: + cmd = SNAND_FIFO_TX_BUSWIDTH_SINGLE; + break; + case 2: + cmd = SNAND_FIFO_TX_BUSWIDTH_DUAL; + break; + case 4: + cmd = SNAND_FIFO_TX_BUSWIDTH_QUAD; + break; + default: + return -EINVAL; + } for (i = 0; i < len; i += data_len) { int err; @@ -409,16 +433,32 @@ static int airoha_snand_write_data(struct airoha_snand_ctrl *as_ctrl, u8 cmd, return 0; } -static int airoha_snand_read_data(struct airoha_snand_ctrl *as_ctrl, u8 *data, - int len) +static int airoha_snand_read_data(struct airoha_snand_ctrl *as_ctrl, + u8 *data, int len, int buswidth) { int i, data_len; + u8 cmd; + + switch (buswidth) { + case 0: + case 1: + cmd = SNAND_FIFO_RX_BUSWIDTH_SINGLE; + break; + case 2: + cmd = SNAND_FIFO_RX_BUSWIDTH_DUAL; + break; + case 4: + cmd = SNAND_FIFO_RX_BUSWIDTH_QUAD; + break; + default: + return -EINVAL; + } for (i = 0; i < len; i += data_len) { int err; data_len = min(len - i, SPI_MAX_TRANSFER_SIZE); - err = airoha_snand_set_fifo_op(as_ctrl, 0xc, data_len); + err = airoha_snand_set_fifo_op(as_ctrl, cmd, data_len); if (err) return err; @@ -902,12 +942,28 @@ static ssize_t airoha_snand_dirmap_write(struct spi_mem_dirmap_desc *desc, static int airoha_snand_exec_op(struct spi_mem *mem, const struct spi_mem_op *op) { - u8 data[8], cmd, opcode = op->cmd.opcode; struct airoha_snand_ctrl *as_ctrl; + int op_len, addr_len, dummy_len; + u8 buf[20], *data; int i, err; as_ctrl = spi_controller_get_devdata(mem->spi->controller); + op_len = op->cmd.nbytes; + addr_len = op->addr.nbytes; + dummy_len = op->dummy.nbytes; + + if (op_len + dummy_len + addr_len > sizeof(buf)) + return -EIO; + + data = buf; + for (i = 0; i < op_len; i++) + *data++ = op->cmd.opcode >> (8 * (op_len - i - 1)); + for (i = 0; i < addr_len; i++) + *data++ = op->addr.val >> (8 * (addr_len - i - 1)); + for (i = 0; i < dummy_len; i++) + *data++ = 0xff; + /* switch to manual mode */ err = airoha_snand_set_mode(as_ctrl, SPI_MODE_MANUAL); if (err < 0) @@ -918,40 +974,40 @@ static int airoha_snand_exec_op(struct spi_mem *mem, return err; /* opcode */ - err = airoha_snand_write_data(as_ctrl, 0x8, &opcode, sizeof(opcode)); + data = buf; + err = airoha_snand_write_data(as_ctrl, data, op_len, + op->cmd.buswidth); if (err) return err; /* addr part */ - cmd = opcode == SPI_NAND_OP_GET_FEATURE ? 0x11 : 0x8; - put_unaligned_be64(op->addr.val, data); - - for (i = ARRAY_SIZE(data) - op->addr.nbytes; - i < ARRAY_SIZE(data); i++) { - err = airoha_snand_write_data(as_ctrl, cmd, &data[i], - sizeof(data[0])); + data += op_len; + if (addr_len) { + err = airoha_snand_write_data(as_ctrl, data, addr_len, + op->addr.buswidth); if (err) return err; } /* dummy */ - data[0] = 0xff; - for (i = 0; i < op->dummy.nbytes; i++) { - err = airoha_snand_write_data(as_ctrl, 0x8, &data[0], - sizeof(data[0])); + data += addr_len; + if (dummy_len) { + err = airoha_snand_write_data(as_ctrl, data, dummy_len, + op->dummy.buswidth); if (err) return err; } /* data */ - if (op->data.dir == SPI_MEM_DATA_IN) { - err = airoha_snand_read_data(as_ctrl, op->data.buf.in, - op->data.nbytes); - if (err) - return err; - } else { - err = airoha_snand_write_data(as_ctrl, 0x8, op->data.buf.out, - op->data.nbytes); + if (op->data.nbytes) { + if (op->data.dir == SPI_MEM_DATA_IN) + err = airoha_snand_read_data(as_ctrl, op->data.buf.in, + op->data.nbytes, + op->data.buswidth); + else + err = airoha_snand_write_data(as_ctrl, op->data.buf.out, + op->data.nbytes, + op->data.buswidth); if (err) return err; } From 20d7b236b78c7ec685a22db5689b9c829975e0c3 Mon Sep 17 00:00:00 2001 From: Mikhail Kshevetskiy Date: Sun, 12 Oct 2025 15:16:56 +0300 Subject: [PATCH 049/263] spi: airoha: switch back to non-dma mode in the case of error Current dirmap code does not switch back to non-dma mode in the case of error. This is wrong. This patch fixes dirmap read/write error path. Fixes: a403997c12019 ("spi: airoha: add SPI-NAND Flash controller driver") Signed-off-by: Mikhail Kshevetskiy Acked-by: Lorenzo Bianconi Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20251012121707.2296160-6-mikhail.kshevetskiy@iopsys.eu Signed-off-by: Mark Brown --- drivers/spi/spi-airoha-snfi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index 0b89dc42545b..8143fbb0cf4e 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -698,13 +698,13 @@ static ssize_t airoha_snand_dirmap_read(struct spi_mem_dirmap_desc *desc, err = airoha_snand_nfi_config(as_ctrl); if (err) - return err; + goto error_dma_mode_off; dma_addr = dma_map_single(as_ctrl->dev, txrx_buf, SPI_NAND_CACHE_SIZE, DMA_FROM_DEVICE); err = dma_mapping_error(as_ctrl->dev, dma_addr); if (err) - return err; + goto error_dma_mode_off; /* set dma addr */ err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_STRADDR, @@ -804,6 +804,8 @@ static ssize_t airoha_snand_dirmap_read(struct spi_mem_dirmap_desc *desc, error_dma_unmap: dma_unmap_single(as_ctrl->dev, dma_addr, SPI_NAND_CACHE_SIZE, DMA_FROM_DEVICE); +error_dma_mode_off: + airoha_snand_set_mode(as_ctrl, SPI_MODE_MANUAL); return err; } @@ -936,6 +938,7 @@ static ssize_t airoha_snand_dirmap_write(struct spi_mem_dirmap_desc *desc, error_dma_unmap: dma_unmap_single(as_ctrl->dev, dma_addr, SPI_NAND_CACHE_SIZE, DMA_TO_DEVICE); + airoha_snand_set_mode(as_ctrl, SPI_MODE_MANUAL); return err; } From 0b7d9b25e4bc2e478c9d06281a65f930769fca09 Mon Sep 17 00:00:00 2001 From: Mikhail Kshevetskiy Date: Sun, 12 Oct 2025 15:16:57 +0300 Subject: [PATCH 050/263] spi: airoha: fix reading/writing of flashes with more than one plane per lun Attaching UBI on the flash with more than one plane per lun will lead to the following error: [ 2.980989] spi-nand spi0.0: Micron SPI NAND was found. [ 2.986309] spi-nand spi0.0: 256 MiB, block size: 128 KiB, page size: 2048, OOB size: 128 [ 2.994978] 2 fixed-partitions partitions found on MTD device spi0.0 [ 3.001350] Creating 2 MTD partitions on "spi0.0": [ 3.006159] 0x000000000000-0x000000020000 : "bl2" [ 3.011663] 0x000000020000-0x000010000000 : "ubi" ... [ 6.391748] ubi0: attaching mtd1 [ 6.412545] ubi0 error: ubi_attach: PEB 0 contains corrupted VID header, and the data does not contain all 0xFF [ 6.422677] ubi0 error: ubi_attach: this may be a non-UBI PEB or a severe VID header corruption which requires manual inspection [ 6.434249] Volume identifier header dump: [ 6.438349] magic 55424923 [ 6.441482] version 1 [ 6.444007] vol_type 0 [ 6.446539] copy_flag 0 [ 6.449068] compat 0 [ 6.451594] vol_id 0 [ 6.454120] lnum 1 [ 6.456651] data_size 4096 [ 6.459442] used_ebs 1061644134 [ 6.462748] data_pad 0 [ 6.465274] sqnum 0 [ 6.467805] hdr_crc 61169820 [ 6.470943] Volume identifier header hexdump: [ 6.475308] hexdump of PEB 0 offset 4096, length 126976 [ 6.507391] ubi0 warning: ubi_attach: valid VID header but corrupted EC header at PEB 4 [ 6.515415] ubi0 error: ubi_compare_lebs: unsupported on-flash UBI format [ 6.522222] ubi0 error: ubi_attach_mtd_dev: failed to attach mtd1, error -22 [ 6.529294] UBI error: cannot attach mtd1 Non dirmap reading works good. Looking to spi_mem_no_dirmap_read() code we'll see: static ssize_t spi_mem_no_dirmap_read(struct spi_mem_dirmap_desc *desc, u64 offs, size_t len, void *buf) { struct spi_mem_op op = desc->info.op_tmpl; int ret; // --- see here --- op.addr.val = desc->info.offset + offs; //----------------- op.data.buf.in = buf; op.data.nbytes = len; ret = spi_mem_adjust_op_size(desc->mem, &op); if (ret) return ret; ret = spi_mem_exec_op(desc->mem, &op); if (ret) return ret; return op.data.nbytes; } The similar happens for spi_mem_no_dirmap_write(). Thus the address passed to the flash should take in the account the value of desc->info.offset. This patch fix dirmap reading/writing of flashes with more than one plane per lun. Fixes: a403997c12019 ("spi: airoha: add SPI-NAND Flash controller driver") Signed-off-by: Mikhail Kshevetskiy Reviewed-by: AngeloGioacchino Del Regno Link: https://patch.msgid.link/20251012121707.2296160-7-mikhail.kshevetskiy@iopsys.eu Signed-off-by: Mark Brown --- drivers/spi/spi-airoha-snfi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-airoha-snfi.c b/drivers/spi/spi-airoha-snfi.c index 8143fbb0cf4e..b78163eaed61 100644 --- a/drivers/spi/spi-airoha-snfi.c +++ b/drivers/spi/spi-airoha-snfi.c @@ -733,8 +733,9 @@ static ssize_t airoha_snand_dirmap_read(struct spi_mem_dirmap_desc *desc, if (err) goto error_dma_unmap; - /* set read addr */ - err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_RD_CTL3, 0x0); + /* set read addr: zero page offset + descriptor read offset */ + err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_RD_CTL3, + desc->info.offset); if (err) goto error_dma_unmap; @@ -870,7 +871,9 @@ static ssize_t airoha_snand_dirmap_write(struct spi_mem_dirmap_desc *desc, if (err) goto error_dma_unmap; - err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_PG_CTL2, 0x0); + /* set write addr: zero page offset + descriptor write offset */ + err = regmap_write(as_ctrl->regmap_nfi, REG_SPI_NFI_PG_CTL2, + desc->info.offset); if (err) goto error_dma_unmap; From c700e7279b29948f5d2aee30df2dbd3124df3b9c Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Fri, 10 Oct 2025 10:31:41 -0700 Subject: [PATCH 051/263] drm/rockchip: dw_hdmi: use correct SCLIN mask for RK3228 In dw_hdmi_rk3228_setup_hpd(), the SCLIN mask incorrectly references the RK3328 variant. This change updates it to the RK3228-specific mask RK3228_HDMI_SCLIN_MSK using FIELD_PREP_WM16, ensuring proper HPD and I2C pin configuration for RK3228. Change: RK3328_HDMI_SCLIN_MSK -> RK3228_HDMI_SCLIN_MSK Fixes: 63df37f3fc71 ("drm/rockchip: dw_hdmi: switch to FIELD_PREP_WM16* macros") Signed-off-by: Alok Tiwari Reviewed-by: Nicolas Frattaroli Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20251010173143.72733-1-alok.a.tiwari@oracle.com --- drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 7b613997bb50..727cdf768161 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -361,7 +361,7 @@ static void dw_hdmi_rk3228_setup_hpd(struct dw_hdmi *dw_hdmi, void *data) regmap_write(hdmi->regmap, RK3228_GRF_SOC_CON2, FIELD_PREP_WM16(RK3228_HDMI_SDAIN_MSK, 1) | - FIELD_PREP_WM16(RK3328_HDMI_SCLIN_MSK, 1)); + FIELD_PREP_WM16(RK3228_HDMI_SCLIN_MSK, 1)); } static enum drm_connector_status From a3c4a0a42e61aad1056a3d33fd603c1ae66d4288 Mon Sep 17 00:00:00 2001 From: Emil Tsalapatis Date: Thu, 16 Oct 2025 11:11:26 -0700 Subject: [PATCH 052/263] sched_ext: fix flag check for deferred callbacks When scheduling the deferred balance callbacks, check SCX_RQ_BAL_CB_PENDING instead of SCX_RQ_BAL_PENDING. This way schedule_deferred() properly tests whether there is already a pending request for queue_balance_callback() to be invoked at the end of .balance(). Fixes: a8ad873113d3 ("sched_ext: defer queue_balance_callback() until after ops.dispatch") Signed-off-by: Emil Tsalapatis Signed-off-by: Tejun Heo --- kernel/sched/ext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 12c9c3595692..ecb251e883ea 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -792,7 +792,7 @@ static void schedule_deferred(struct rq *rq) return; /* Don't do anything if there already is a deferred operation. */ - if (rq->scx.flags & SCX_RQ_BAL_PENDING) + if (rq->scx.flags & SCX_RQ_BAL_CB_PENDING) return; /* From 74b84d1be0220b99405c16a4a3e1e503e3bd8387 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 7 Oct 2025 11:43:12 +0200 Subject: [PATCH 053/263] driver core: fw_devlink: Don't warn about sync_state() pending Due to the wider deployment of the ->sync_state() support, for PM domains for example, we are receiving reports about the sync_state() pending message that is being logged in fw_devlink_dev_sync_state(). In particular as it's printed at the warning level, which is questionable. Even if it certainly is useful to know that the ->sync_state() condition could not be met, there may be nothing wrong with it. For example, a driver may be built as module and are still waiting to be initialized/probed. For this reason let's move to the info level for now. Reported-by: Geert Uytterhoeven Reported-by: Sebin Francis Reported-by: Diederik de Haas Reported-by: Jon Hunter Reviewed-by: Tomi Valkeinen Signed-off-by: Ulf Hansson Reviewed-by: Dhruva Gole Reviewed-by: Sebastian Reichel Reviewed-by: Kevin Hilman Acked-by: Saravana Kannan Reviewed-by: Sebin Francis Tested-by: Sebin Francis Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 3c533dab8fa5..f69dc9c85954 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1784,7 +1784,7 @@ static int fw_devlink_dev_sync_state(struct device *dev, void *data) return 0; if (fw_devlink_sync_state == FW_DEVLINK_SYNC_STATE_STRICT) { - dev_warn(sup, "sync_state() pending due to %s\n", + dev_info(sup, "sync_state() pending due to %s\n", dev_name(link->consumer)); return 0; } From a91c8096590bd7801a26454789f2992094fe36da Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 23 Jul 2025 16:24:16 +0200 Subject: [PATCH 054/263] devcoredump: Fix circular locking dependency with devcd->mutex. The original code causes a circular locking dependency found by lockdep. ====================================================== WARNING: possible circular locking dependency detected 6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 Tainted: G S U ------------------------------------------------------ xe_fault_inject/5091 is trying to acquire lock: ffff888156815688 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}, at: __flush_work+0x25d/0x660 but task is already holding lock: ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&devcd->mutex){+.+.}-{3:3}: mutex_lock_nested+0x4e/0xc0 devcd_data_write+0x27/0x90 sysfs_kf_bin_write+0x80/0xf0 kernfs_fop_write_iter+0x169/0x220 vfs_write+0x293/0x560 ksys_write+0x72/0xf0 __x64_sys_write+0x19/0x30 x64_sys_call+0x2bf/0x2660 do_syscall_64+0x93/0xb60 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (kn->active#236){++++}-{0:0}: kernfs_drain+0x1e2/0x200 __kernfs_remove+0xae/0x400 kernfs_remove_by_name_ns+0x5d/0xc0 remove_files+0x54/0x70 sysfs_remove_group+0x3d/0xa0 sysfs_remove_groups+0x2e/0x60 device_remove_attrs+0xc7/0x100 device_del+0x15d/0x3b0 devcd_del+0x19/0x30 process_one_work+0x22b/0x6f0 worker_thread+0x1e8/0x3d0 kthread+0x11c/0x250 ret_from_fork+0x26c/0x2e0 ret_from_fork_asm+0x1a/0x30 -> #0 ((work_completion)(&(&devcd->del_wk)->work)){+.+.}-{0:0}: __lock_acquire+0x1661/0x2860 lock_acquire+0xc4/0x2f0 __flush_work+0x27a/0x660 flush_delayed_work+0x5d/0xa0 dev_coredump_put+0x63/0xa0 xe_driver_devcoredump_fini+0x12/0x20 [xe] devm_action_release+0x12/0x30 release_nodes+0x3a/0x120 devres_release_all+0x8a/0xd0 device_unbind_cleanup+0x12/0x80 device_release_driver_internal+0x23a/0x280 device_driver_detach+0x14/0x20 unbind_store+0xaf/0xc0 drv_attr_store+0x21/0x50 sysfs_kf_write+0x4a/0x80 kernfs_fop_write_iter+0x169/0x220 vfs_write+0x293/0x560 ksys_write+0x72/0xf0 __x64_sys_write+0x19/0x30 x64_sys_call+0x2bf/0x2660 do_syscall_64+0x93/0xb60 entry_SYSCALL_64_after_hwframe+0x76/0x7e other info that might help us debug this: Chain exists of: (work_completion)(&(&devcd->del_wk)->work) --> kn->active#236 --> &devcd->mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&devcd->mutex); lock(kn->active#236); lock(&devcd->mutex); lock((work_completion)(&(&devcd->del_wk)->work)); *** DEADLOCK *** 5 locks held by xe_fault_inject/5091: #0: ffff8881129f9488 (sb_writers#5){.+.+}-{0:0}, at: ksys_write+0x72/0xf0 #1: ffff88810c755078 (&of->mutex#2){+.+.}-{3:3}, at: kernfs_fop_write_iter+0x123/0x220 #2: ffff8881054811a0 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x55/0x280 #3: ffff888156815620 (&devcd->mutex){+.+.}-{3:3}, at: dev_coredump_put+0x3f/0xa0 #4: ffffffff8359e020 (rcu_read_lock){....}-{1:2}, at: __flush_work+0x72/0x660 stack backtrace: CPU: 14 UID: 0 PID: 5091 Comm: xe_fault_inject Tainted: G S U 6.16.0-rc6-lgci-xe-xe-pw-151626v3+ #1 PREEMPT_{RT,(lazy)} Tainted: [S]=CPU_OUT_OF_SPEC, [U]=USER Hardware name: Micro-Star International Co., Ltd. MS-7D25/PRO Z690-A DDR4(MS-7D25), BIOS 1.10 12/13/2021 Call Trace: dump_stack_lvl+0x91/0xf0 dump_stack+0x10/0x20 print_circular_bug+0x285/0x360 check_noncircular+0x135/0x150 ? register_lock_class+0x48/0x4a0 __lock_acquire+0x1661/0x2860 lock_acquire+0xc4/0x2f0 ? __flush_work+0x25d/0x660 ? mark_held_locks+0x46/0x90 ? __flush_work+0x25d/0x660 __flush_work+0x27a/0x660 ? __flush_work+0x25d/0x660 ? trace_hardirqs_on+0x1e/0xd0 ? __pfx_wq_barrier_func+0x10/0x10 flush_delayed_work+0x5d/0xa0 dev_coredump_put+0x63/0xa0 xe_driver_devcoredump_fini+0x12/0x20 [xe] devm_action_release+0x12/0x30 release_nodes+0x3a/0x120 devres_release_all+0x8a/0xd0 device_unbind_cleanup+0x12/0x80 device_release_driver_internal+0x23a/0x280 ? bus_find_device+0xa8/0xe0 device_driver_detach+0x14/0x20 unbind_store+0xaf/0xc0 drv_attr_store+0x21/0x50 sysfs_kf_write+0x4a/0x80 kernfs_fop_write_iter+0x169/0x220 vfs_write+0x293/0x560 ksys_write+0x72/0xf0 __x64_sys_write+0x19/0x30 x64_sys_call+0x2bf/0x2660 do_syscall_64+0x93/0xb60 ? __f_unlock_pos+0x15/0x20 ? __x64_sys_getdents64+0x9b/0x130 ? __pfx_filldir64+0x10/0x10 ? do_syscall_64+0x1a2/0xb60 ? clear_bhb_loop+0x30/0x80 ? clear_bhb_loop+0x30/0x80 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x76e292edd574 Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89 RSP: 002b:00007fffe247a828 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 000076e292edd574 RDX: 000000000000000c RSI: 00006267f6306063 RDI: 000000000000000b RBP: 000000000000000c R08: 000076e292fc4b20 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000202 R12: 00006267f6306063 R13: 000000000000000b R14: 00006267e6859c00 R15: 000076e29322a000 xe 0000:03:00.0: [drm] Xe device coredump has been deleted. Fixes: 01daccf74832 ("devcoredump : Serialize devcd_del work") Cc: Mukesh Ojha Cc: Greg Kroah-Hartman Cc: Johannes Berg Cc: Rafael J. Wysocki Cc: Danilo Krummrich Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Maarten Lankhorst Cc: Matthew Brost Acked-by: Mukesh Ojha Link: https://lore.kernel.org/r/20250723142416.1020423-1-dev@lankhorst.se Signed-off-by: Greg Kroah-Hartman --- drivers/base/devcoredump.c | 136 ++++++++++++++++++++++--------------- 1 file changed, 83 insertions(+), 53 deletions(-) diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 37faf6156d7c..55bdc7f5e59d 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -23,50 +23,46 @@ struct devcd_entry { void *data; size_t datalen; /* - * Here, mutex is required to serialize the calls to del_wk work between - * user/kernel space which happens when devcd is added with device_add() - * and that sends uevent to user space. User space reads the uevents, - * and calls to devcd_data_write() which try to modify the work which is - * not even initialized/queued from devcoredump. + * There are 2 races for which mutex is required. * + * The first race is between device creation and userspace writing to + * schedule immediately destruction. * + * This race is handled by arming the timer before device creation, but + * when device creation fails the timer still exists. * - * cpu0(X) cpu1(Y) + * To solve this, hold the mutex during device_add(), and set + * init_completed on success before releasing the mutex. * - * dev_coredump() uevent sent to user space - * device_add() ======================> user space process Y reads the - * uevents writes to devcd fd - * which results into writes to + * That way the timer will never fire until device_add() is called, + * it will do nothing if init_completed is not set. The timer is also + * cancelled in that case. * - * devcd_data_write() - * mod_delayed_work() - * try_to_grab_pending() - * timer_delete() - * debug_assert_init() - * INIT_DELAYED_WORK() - * schedule_delayed_work() - * - * - * Also, mutex alone would not be enough to avoid scheduling of - * del_wk work after it get flush from a call to devcd_free() - * mentioned as below. - * - * disabled_store() - * devcd_free() - * mutex_lock() devcd_data_write() - * flush_delayed_work() - * mutex_unlock() - * mutex_lock() - * mod_delayed_work() - * mutex_unlock() - * So, delete_work flag is required. + * The second race involves multiple parallel invocations of devcd_free(), + * add a deleted flag so only 1 can call the destructor. */ struct mutex mutex; - bool delete_work; + bool init_completed, deleted; struct module *owner; ssize_t (*read)(char *buffer, loff_t offset, size_t count, void *data, size_t datalen); void (*free)(void *data); + /* + * If nothing interferes and device_add() was returns success, + * del_wk will destroy the device after the timer fires. + * + * Multiple userspace processes can interfere in the working of the timer: + * - Writing to the coredump will reschedule the timer to run immediately, + * if still armed. + * + * This is handled by using "if (cancel_delayed_work()) { + * schedule_delayed_work() }", to prevent re-arming after having + * been previously fired. + * - Writing to /sys/class/devcoredump/disabled will destroy the + * coredump synchronously. + * This is handled by using disable_delayed_work_sync(), and then + * checking if deleted flag is set with &devcd->mutex held. + */ struct delayed_work del_wk; struct device *failing_dev; }; @@ -95,14 +91,27 @@ static void devcd_dev_release(struct device *dev) kfree(devcd); } +static void __devcd_del(struct devcd_entry *devcd) +{ + devcd->deleted = true; + device_del(&devcd->devcd_dev); + put_device(&devcd->devcd_dev); +} + static void devcd_del(struct work_struct *wk) { struct devcd_entry *devcd; + bool init_completed; devcd = container_of(wk, struct devcd_entry, del_wk.work); - device_del(&devcd->devcd_dev); - put_device(&devcd->devcd_dev); + /* devcd->mutex serializes against dev_coredumpm_timeout */ + mutex_lock(&devcd->mutex); + init_completed = devcd->init_completed; + mutex_unlock(&devcd->mutex); + + if (init_completed) + __devcd_del(devcd); } static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, @@ -122,12 +131,12 @@ static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct devcd_entry *devcd = dev_to_devcd(dev); - mutex_lock(&devcd->mutex); - if (!devcd->delete_work) { - devcd->delete_work = true; - mod_delayed_work(system_wq, &devcd->del_wk, 0); - } - mutex_unlock(&devcd->mutex); + /* + * Although it's tempting to use mod_delayed work here, + * that will cause a reschedule if the timer already fired. + */ + if (cancel_delayed_work(&devcd->del_wk)) + schedule_delayed_work(&devcd->del_wk, 0); return count; } @@ -151,11 +160,21 @@ static int devcd_free(struct device *dev, void *data) { struct devcd_entry *devcd = dev_to_devcd(dev); + /* + * To prevent a race with devcd_data_write(), disable work and + * complete manually instead. + * + * We cannot rely on the return value of + * disable_delayed_work_sync() here, because it might be in the + * middle of a cancel_delayed_work + schedule_delayed_work pair. + * + * devcd->mutex here guards against multiple parallel invocations + * of devcd_free(). + */ + disable_delayed_work_sync(&devcd->del_wk); mutex_lock(&devcd->mutex); - if (!devcd->delete_work) - devcd->delete_work = true; - - flush_delayed_work(&devcd->del_wk); + if (!devcd->deleted) + __devcd_del(devcd); mutex_unlock(&devcd->mutex); return 0; } @@ -179,12 +198,10 @@ static ssize_t disabled_show(const struct class *class, const struct class_attri * put_device() <- last reference * error = fn(dev, data) devcd_dev_release() * devcd_free(dev, data) kfree(devcd) - * mutex_lock(&devcd->mutex); * * * In the above diagram, it looks like disabled_store() would be racing with parallelly - * running devcd_del() and result in memory abort while acquiring devcd->mutex which - * is called after kfree of devcd memory after dropping its last reference with + * running devcd_del() and result in memory abort after dropping its last reference with * put_device(). However, this will not happens as fn(dev, data) runs * with its own reference to device via klist_node so it is not its last reference. * so, above situation would not occur. @@ -374,7 +391,7 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, devcd->read = read; devcd->free = free; devcd->failing_dev = get_device(dev); - devcd->delete_work = false; + devcd->deleted = false; mutex_init(&devcd->mutex); device_initialize(&devcd->devcd_dev); @@ -383,8 +400,14 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, atomic_inc_return(&devcd_count)); devcd->devcd_dev.class = &devcd_class; - mutex_lock(&devcd->mutex); dev_set_uevent_suppress(&devcd->devcd_dev, true); + + /* devcd->mutex prevents devcd_del() completing until init finishes */ + mutex_lock(&devcd->mutex); + devcd->init_completed = false; + INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); + schedule_delayed_work(&devcd->del_wk, timeout); + if (device_add(&devcd->devcd_dev)) goto put_device; @@ -401,13 +424,20 @@ void dev_coredumpm_timeout(struct device *dev, struct module *owner, dev_set_uevent_suppress(&devcd->devcd_dev, false); kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); - INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); - schedule_delayed_work(&devcd->del_wk, timeout); + + /* + * Safe to run devcd_del() now that we are done with devcd_dev. + * Alternatively we could have taken a ref on devcd_dev before + * dropping the lock. + */ + devcd->init_completed = true; mutex_unlock(&devcd->mutex); return; put_device: - put_device(&devcd->devcd_dev); mutex_unlock(&devcd->mutex); + cancel_delayed_work_sync(&devcd->del_wk); + put_device(&devcd->devcd_dev); + put_module: module_put(owner); free: From c7fbb8218b4ad35fec0bd2256d2b9c8d60331f33 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Thu, 16 Oct 2025 12:14:56 +0200 Subject: [PATCH 055/263] sysfs: check visibility before changing group attribute ownership Since commit 0c17270f9b92 ("net: sysfs: Implement is_visible for phys_(port_id, port_name, switch_id)"), __dev_change_net_namespace() can hit WARN_ON() when trying to change owner of a file that isn't visible. See the trace below: WARNING: CPU: 6 PID: 2938 at net/core/dev.c:12410 __dev_change_net_namespace+0xb89/0xc30 CPU: 6 UID: 0 PID: 2938 Comm: incusd Not tainted 6.17.1-1-mainline #1 PREEMPT(full) 4b783b4a638669fb644857f484487d17cb45ed1f Hardware name: Framework Laptop 13 (AMD Ryzen 7040Series)/FRANMDCP07, BIOS 03.07 02/19/2025 RIP: 0010:__dev_change_net_namespace+0xb89/0xc30 [...] Call Trace: ? if6_seq_show+0x30/0x50 do_setlink.isra.0+0xc7/0x1270 ? __nla_validate_parse+0x5c/0xcc0 ? security_capable+0x94/0x1a0 rtnl_newlink+0x858/0xc20 ? update_curr+0x8e/0x1c0 ? update_entity_lag+0x71/0x80 ? sched_balance_newidle+0x358/0x450 ? psi_task_switch+0x113/0x2a0 ? __pfx_rtnl_newlink+0x10/0x10 rtnetlink_rcv_msg+0x346/0x3e0 ? sched_clock+0x10/0x30 ? __pfx_rtnetlink_rcv_msg+0x10/0x10 netlink_rcv_skb+0x59/0x110 netlink_unicast+0x285/0x3c0 ? __alloc_skb+0xdb/0x1a0 netlink_sendmsg+0x20d/0x430 ____sys_sendmsg+0x39f/0x3d0 ? import_iovec+0x2f/0x40 ___sys_sendmsg+0x99/0xe0 __sys_sendmsg+0x8a/0xf0 do_syscall_64+0x81/0x970 ? __sys_bind+0xe3/0x110 ? syscall_exit_work+0x143/0x1b0 ? do_syscall_64+0x244/0x970 ? sock_alloc_file+0x63/0xc0 ? syscall_exit_work+0x143/0x1b0 ? do_syscall_64+0x244/0x970 ? alloc_fd+0x12e/0x190 ? put_unused_fd+0x2a/0x70 ? do_sys_openat2+0xa2/0xe0 ? syscall_exit_work+0x143/0x1b0 ? do_syscall_64+0x244/0x970 ? exc_page_fault+0x7e/0x1a0 entry_SYSCALL_64_after_hwframe+0x76/0x7e [...] Fix this by checking is_visible() before trying to touch the attribute. Fixes: 303a42769c4c ("sysfs: add sysfs_group{s}_change_owner()") Fixes: 0c17270f9b92 ("net: sysfs: Implement is_visible for phys_(port_id, port_name, switch_id)") Reported-by: Cynthia Closes: https://lore.kernel.org/netdev/01070199e22de7f8-28f711ab-d3f1-46d9-b9a0-048ab05eb09b-000000@eu-central-1.amazonses.com/ Signed-off-by: Fernando Fernandez Mancera Reviewed-by: Jakub Kicinski Link: https://lore.kernel.org/r/20251016101456.4087-1-fmancera@suse.de Signed-off-by: Greg Kroah-Hartman --- fs/sysfs/group.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 2d78e94072a0..e142bac4f9f8 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -498,17 +498,26 @@ int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, } EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj); -static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, +static int sysfs_group_attrs_change_owner(struct kobject *kobj, + struct kernfs_node *grp_kn, const struct attribute_group *grp, struct iattr *newattrs) { struct kernfs_node *kn; - int error; + int error, i; + umode_t mode; if (grp->attrs) { struct attribute *const *attr; - for (attr = grp->attrs; *attr; attr++) { + for (i = 0, attr = grp->attrs; *attr; i++, attr++) { + if (grp->is_visible) { + mode = grp->is_visible(kobj, *attr, i); + if (mode & SYSFS_GROUP_INVISIBLE) + break; + if (!mode) + continue; + } kn = kernfs_find_and_get(grp_kn, (*attr)->name); if (!kn) return -ENOENT; @@ -523,7 +532,14 @@ static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn, if (grp->bin_attrs) { const struct bin_attribute *const *bin_attr; - for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { + for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) { + if (grp->is_bin_visible) { + mode = grp->is_bin_visible(kobj, *bin_attr, i); + if (mode & SYSFS_GROUP_INVISIBLE) + break; + if (!mode) + continue; + } kn = kernfs_find_and_get(grp_kn, (*bin_attr)->attr.name); if (!kn) return -ENOENT; @@ -573,7 +589,7 @@ int sysfs_group_change_owner(struct kobject *kobj, error = kernfs_setattr(grp_kn, &newattrs); if (!error) - error = sysfs_group_attrs_change_owner(grp_kn, grp, &newattrs); + error = sysfs_group_attrs_change_owner(kobj, grp_kn, grp, &newattrs); kernfs_put(grp_kn); From 4eabd0d8791eaf9a7b114ccbf56eb488aefe7b1f Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Fri, 17 Oct 2025 11:29:22 +0100 Subject: [PATCH 056/263] drm/panthor: Fix kernel panic on partial unmap of a GPU VA region This commit address a kernel panic issue that can happen if Userspace tries to partially unmap a GPU virtual region (aka drm_gpuva). The VM_BIND interface allows partial unmapping of a BO. Panthor driver pre-allocates memory for the new drm_gpuva structures that would be needed for the map/unmap operation, done using drm_gpuvm layer. It expected that only one new drm_gpuva would be needed on umap but a partial unmap can require 2 new drm_gpuva and that's why it ended up doing a NULL pointer dereference causing a kernel panic. Following dump was seen when partial unmap was exercised. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000078 Mem abort info: ESR = 0x0000000096000046 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x06: level 2 translation fault Data abort info: ISV = 0, ISS = 0x00000046, ISS2 = 0x00000000 CM = 0, WnR = 1, TnD = 0, TagAccess = 0 GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=000000088a863000 [000000000000078] pgd=080000088a842003, p4d=080000088a842003, pud=0800000884bf5003, pmd=0000000000000000 Internal error: Oops: 0000000096000046 [#1] PREEMPT SMP pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : panthor_gpuva_sm_step_remap+0xe4/0x330 [panthor] lr : panthor_gpuva_sm_step_remap+0x6c/0x330 [panthor] sp : ffff800085d43970 x29: ffff800085d43970 x28: ffff00080363e440 x27: ffff0008090c6000 x26: 0000000000000030 x25: ffff800085d439f8 x24: ffff00080d402000 x23: ffff800085d43b60 x22: ffff800085d439e0 x21: ffff00080abdb180 x20: 0000000000000000 x19: 0000000000000000 x18: 0000000000000010 x17: 6e656c202c303030 x16: 3666666666646466 x15: 393d61766f69202c x14: 312d3d7361203a70 x13: 303030323d6e656c x12: ffff80008324bf58 x11: 0000000000000003 x10: 0000000000000002 x9 : ffff8000801a6a9c x8 : ffff00080360b300 x7 : 0000000000000000 x6 : 000000088aa35fc7 x5 : fff1000080000000 x4 : ffff8000842ddd30 x3 : 0000000000000001 x2 : 0000000100000000 x1 : 0000000000000001 x0 : 0000000000000078 Call trace: panthor_gpuva_sm_step_remap+0xe4/0x330 [panthor] op_remap_cb.isra.22+0x50/0x80 __drm_gpuvm_sm_unmap+0x10c/0x1c8 drm_gpuvm_sm_unmap+0x40/0x60 panthor_vm_exec_op+0xb4/0x3d0 [panthor] panthor_vm_bind_exec_sync_op+0x154/0x278 [panthor] panthor_ioctl_vm_bind+0x160/0x4a0 [panthor] drm_ioctl_kernel+0xbc/0x138 drm_ioctl+0x240/0x500 __arm64_sys_ioctl+0xb0/0xf8 invoke_syscall+0x4c/0x110 el0_svc_common.constprop.1+0x98/0xf8 do_el0_svc+0x24/0x38 el0_svc+0x40/0xf8 el0t_64_sync_handler+0xa0/0xc8 el0t_64_sync+0x174/0x178 Signed-off-by: Akash Goel Reviewed-by: Boris Brezillon Reviewed-by: Liviu Dudau Fixes: 647810ec2476 ("drm/panthor: Add the MMU/VM logical block") Reviewed-by: Steven Price Signed-off-by: Steven Price Link: https://lore.kernel.org/r/20251017102922.670084-1-akash.goel@arm.com --- drivers/gpu/drm/panthor/panthor_mmu.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index 6dec4354e378..7870e7dbaa5d 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1175,10 +1175,14 @@ panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx) break; case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: - /* Partial unmaps might trigger a remap with either a prev or a next VA, - * but not both. + /* Two VMAs can be needed for an unmap, as an unmap can happen + * in the middle of a drm_gpuva, requiring a remap with both + * prev & next VA. Or an unmap can span more than one drm_gpuva + * where the first and last ones are covered partially, requring + * a remap for the first with a prev VA and remap for the last + * with a next VA. */ - vma_count = 1; + vma_count = 2; break; default: From cfec502b3d091ff7c24df6ccf8079470584315a0 Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Thu, 16 Oct 2025 15:31:44 +0200 Subject: [PATCH 057/263] rust: device: fix device context of Device::parent() Regardless of the DeviceContext of a device, we can't give any guarantees about the DeviceContext of its parent device. This is very subtle, since it's only caused by a simple typo, i.e. Self::from_raw(parent) which preserves the DeviceContext in this case, vs. Device::from_raw(parent) which discards the DeviceContext. (I should have noticed it doing the correct thing in auxiliary::Device subsequently, but somehow missed it.) Hence, fix both Device::parent() and auxiliary::Device::parent(). Cc: stable@vger.kernel.org Fixes: a4c9f71e3440 ("rust: device: implement Device::parent()") Reviewed-by: Alice Ryhl Reviewed-by: Alexandre Courbot Acked-by: Greg Kroah-Hartman Signed-off-by: Danilo Krummrich --- rust/kernel/auxiliary.rs | 8 +------- rust/kernel/device.rs | 4 ++-- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/rust/kernel/auxiliary.rs b/rust/kernel/auxiliary.rs index e11848bbf206..7a3b0b9c418e 100644 --- a/rust/kernel/auxiliary.rs +++ b/rust/kernel/auxiliary.rs @@ -217,13 +217,7 @@ pub fn id(&self) -> u32 { /// Returns a reference to the parent [`device::Device`], if any. pub fn parent(&self) -> Option<&device::Device> { - let ptr: *const Self = self; - // CAST: `Device` types are transparent to each other. - let ptr: *const Device = ptr.cast(); - // SAFETY: `ptr` was derived from `&self`. - let this = unsafe { &*ptr }; - - this.as_ref().parent() + self.as_ref().parent() } } diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 1321e6f0b53c..a849b7dde2fd 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -251,7 +251,7 @@ pub(crate) fn as_raw(&self) -> *mut bindings::device { /// Returns a reference to the parent device, if any. #[cfg_attr(not(CONFIG_AUXILIARY_BUS), expect(dead_code))] - pub(crate) fn parent(&self) -> Option<&Self> { + pub(crate) fn parent(&self) -> Option<&Device> { // SAFETY: // - By the type invariant `self.as_raw()` is always valid. // - The parent device is only ever set at device creation. @@ -264,7 +264,7 @@ pub(crate) fn parent(&self) -> Option<&Self> { // - Since `parent` is not NULL, it must be a valid pointer to a `struct device`. // - `parent` is valid for the lifetime of `self`, since a `struct device` holds a // reference count of its parent. - Some(unsafe { Self::from_raw(parent) }) + Some(unsafe { Device::from_raw(parent) }) } } From ca525d53f994d45c8140968b571372c45f555ac1 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Fri, 17 Oct 2025 21:30:05 -0600 Subject: [PATCH 058/263] RISC-V: Define pgprot_dmacoherent() for non-coherent devices The pgprot_dmacoherent() is used when allocating memory for non-coherent devices and by default pgprot_dmacoherent() is same as pgprot_noncached() unless architecture overrides it. Currently, there is no pgprot_dmacoherent() definition for RISC-V hence non-coherent device memory is being mapped as IO thereby making CPU access to such memory slow. Define pgprot_dmacoherent() to be same as pgprot_writecombine() for RISC-V so that CPU access non-coherent device memory as NOCACHE which is better than accessing it as IO. Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support") Signed-off-by: Anup Patel Tested-by: Han Gao Tested-by: Guo Ren (Alibaba DAMO Academy) Link: https://lore.kernel.org/r/20250820152316.1012757-1-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 29e994a9afb6..5a08eb5fe99f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -654,6 +654,8 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } +#define pgprot_dmacoherent pgprot_writecombine + /* * Both Svade and Svadu control the hardware behavior when the PTE A/D bits need to be set. By * default the M-mode firmware enables the hardware updating scheme when only Svadu is present in From e7b969cbe302d49032d4c2bb36c57c9c623ebfdc Mon Sep 17 00:00:00 2001 From: Sunil V L Date: Mon, 13 Oct 2025 23:49:47 +0530 Subject: [PATCH 059/263] ACPI: RIMT: Fix unused function warnings when CONFIG_IOMMU_API is disabled When CONFIG_IOMMU_API is disabled, some functions defined outside its conditional scope become unused, triggering compiler warnings reported by the kernel test robot. Move these function definitions inside the #ifdef CONFIG_IOMMU_API block to prevent unused function warnings when the configuration is disabled. Fixes: 8f7729552582 ("ACPI: RISC-V: Add support for RIMT") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509280031.8Sjkr4bh-lkp@intel.com/ Signed-off-by: Sunil V L Link: https://lore.kernel.org/r/20251013181947.261759-1-sunilvl@ventanamicro.com --- drivers/acpi/riscv/rimt.c | 122 +++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 61 deletions(-) diff --git a/drivers/acpi/riscv/rimt.c b/drivers/acpi/riscv/rimt.c index 683fcfe35c31..7f423405e5ef 100644 --- a/drivers/acpi/riscv/rimt.c +++ b/drivers/acpi/riscv/rimt.c @@ -61,30 +61,6 @@ static int rimt_set_fwnode(struct acpi_rimt_node *rimt_node, return 0; } -/** - * rimt_get_fwnode() - Retrieve fwnode associated with an RIMT node - * - * @node: RIMT table node to be looked-up - * - * Returns: fwnode_handle pointer on success, NULL on failure - */ -static struct fwnode_handle *rimt_get_fwnode(struct acpi_rimt_node *node) -{ - struct fwnode_handle *fwnode = NULL; - struct rimt_fwnode *curr; - - spin_lock(&rimt_fwnode_lock); - list_for_each_entry(curr, &rimt_fwnode_list, list) { - if (curr->rimt_node == node) { - fwnode = curr->fwnode; - break; - } - } - spin_unlock(&rimt_fwnode_lock); - - return fwnode; -} - static acpi_status rimt_match_node_callback(struct acpi_rimt_node *node, void *context) { @@ -202,6 +178,67 @@ static struct acpi_rimt_node *rimt_scan_node(enum acpi_rimt_node_type type, return NULL; } +/* + * RISC-V supports IOMMU as a PCI device or a platform device. + * When it is a platform device, there should be a namespace device as + * well along with RIMT. To create the link between RIMT information and + * the platform device, the IOMMU driver should register itself with the + * RIMT module. This is true for PCI based IOMMU as well. + */ +int rimt_iommu_register(struct device *dev) +{ + struct fwnode_handle *rimt_fwnode; + struct acpi_rimt_node *node; + + node = rimt_scan_node(ACPI_RIMT_NODE_TYPE_IOMMU, dev); + if (!node) { + pr_err("Could not find IOMMU node in RIMT\n"); + return -ENODEV; + } + + if (dev_is_pci(dev)) { + rimt_fwnode = acpi_alloc_fwnode_static(); + if (!rimt_fwnode) + return -ENOMEM; + + rimt_fwnode->dev = dev; + if (!dev->fwnode) + dev->fwnode = rimt_fwnode; + + rimt_set_fwnode(node, rimt_fwnode); + } else { + rimt_set_fwnode(node, dev->fwnode); + } + + return 0; +} + +#ifdef CONFIG_IOMMU_API + +/** + * rimt_get_fwnode() - Retrieve fwnode associated with an RIMT node + * + * @node: RIMT table node to be looked-up + * + * Returns: fwnode_handle pointer on success, NULL on failure + */ +static struct fwnode_handle *rimt_get_fwnode(struct acpi_rimt_node *node) +{ + struct fwnode_handle *fwnode = NULL; + struct rimt_fwnode *curr; + + spin_lock(&rimt_fwnode_lock); + list_for_each_entry(curr, &rimt_fwnode_list, list) { + if (curr->rimt_node == node) { + fwnode = curr->fwnode; + break; + } + } + spin_unlock(&rimt_fwnode_lock); + + return fwnode; +} + static bool rimt_pcie_rc_supports_ats(struct acpi_rimt_node *node) { struct acpi_rimt_pcie_rc *pci_rc; @@ -290,43 +327,6 @@ static struct acpi_rimt_node *rimt_node_get_id(struct acpi_rimt_node *node, return NULL; } -/* - * RISC-V supports IOMMU as a PCI device or a platform device. - * When it is a platform device, there should be a namespace device as - * well along with RIMT. To create the link between RIMT information and - * the platform device, the IOMMU driver should register itself with the - * RIMT module. This is true for PCI based IOMMU as well. - */ -int rimt_iommu_register(struct device *dev) -{ - struct fwnode_handle *rimt_fwnode; - struct acpi_rimt_node *node; - - node = rimt_scan_node(ACPI_RIMT_NODE_TYPE_IOMMU, dev); - if (!node) { - pr_err("Could not find IOMMU node in RIMT\n"); - return -ENODEV; - } - - if (dev_is_pci(dev)) { - rimt_fwnode = acpi_alloc_fwnode_static(); - if (!rimt_fwnode) - return -ENOMEM; - - rimt_fwnode->dev = dev; - if (!dev->fwnode) - dev->fwnode = rimt_fwnode; - - rimt_set_fwnode(node, rimt_fwnode); - } else { - rimt_set_fwnode(node, dev->fwnode); - } - - return 0; -} - -#ifdef CONFIG_IOMMU_API - static struct acpi_rimt_node *rimt_node_map_id(struct acpi_rimt_node *node, u32 id_in, u32 *id_out, u8 type_mask) From 223bfc4d403c5e4841f9d2b5be88a6e236942e4e Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 15 Oct 2025 17:32:05 -0700 Subject: [PATCH 060/263] riscv: Register IPI IRQs with unique names This allows different IPIs to be distinguished in tracing output. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20251016003244.3910332-1-samuel.holland@sifive.com Signed-off-by: Paul Walmsley --- arch/riscv/kernel/smp.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index e650dec44817..5ed5095320e6 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -40,6 +40,17 @@ enum ipi_message_type { IPI_MAX }; +static const char * const ipi_names[] = { + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", + [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", +}; + unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = INVALID_HARTID }; @@ -199,7 +210,7 @@ void riscv_ipi_set_virq_range(int virq, int nr) /* Request IPIs */ for (i = 0; i < nr_ipi; i++) { err = request_percpu_irq(ipi_virq_base + i, handle_IPI, - "IPI", &ipi_dummy_dev); + ipi_names[i], &ipi_dummy_dev); WARN_ON(err); ipi_desc[i] = irq_to_desc(ipi_virq_base + i); @@ -210,17 +221,6 @@ void riscv_ipi_set_virq_range(int virq, int nr) riscv_ipi_enable(); } -static const char * const ipi_names[] = { - [IPI_RESCHEDULE] = "Rescheduling interrupts", - [IPI_CALL_FUNC] = "Function call interrupts", - [IPI_CPU_STOP] = "CPU stop interrupts", - [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", - [IPI_IRQ_WORK] = "IRQ work interrupts", - [IPI_TIMER] = "Timer broadcast interrupts", - [IPI_CPU_BACKTRACE] = "CPU backtrace interrupts", - [IPI_KGDB_ROUNDUP] = "KGDB roundup interrupts", -}; - void show_ipi_stats(struct seq_file *p, int prec) { unsigned int cpu, i; From 5898fc01ff344075e4332aa9abeb0841c85e7e51 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 15 Oct 2025 16:33:24 -0700 Subject: [PATCH 061/263] riscv: mm: Define MAX_POSSIBLE_PHYSMEM_BITS for zsmalloc This definition is used by zsmalloc to optimize memory allocation. On riscv64, it is the same as MAX_PHYSMEM_BITS from asm/sparsemem.h, but that definition depends on CONFIG_SPARSEMEM. The correct definition is already provided for riscv32. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20251015233327.3885003-1-samuel.holland@sifive.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/pgtable-64.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 1018d2216901..6e789fa58514 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -69,6 +69,8 @@ typedef struct { #define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t)) +#define MAX_POSSIBLE_PHYSMEM_BITS 56 + /* * rv64 PTE format: * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 From 768e054de01bef8701c24ec49309e57e0167af44 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 15 Oct 2025 15:56:00 -0700 Subject: [PATCH 062/263] riscv: Remove the PER_CPU_OFFSET_SHIFT macro __per_cpu_offset is an array of unsigned long, so we can reuse the existing RISCV_LGPTR macro. Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20251015225604.3860409-1-samuel.holland@sifive.com Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/asm.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 8bd2a11382a3..ac28066bb564 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -84,15 +84,9 @@ .endm #ifdef CONFIG_SMP -#ifdef CONFIG_32BIT -#define PER_CPU_OFFSET_SHIFT 2 -#else -#define PER_CPU_OFFSET_SHIFT 3 -#endif - .macro asm_per_cpu dst sym tmp lw \tmp, TASK_TI_CPU_NUM(tp) - slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT + slli \tmp, \tmp, RISCV_LGPTR la \dst, __per_cpu_offset add \dst, \dst, \tmp REG_L \tmp, 0(\dst) From d2721bb165b3ee00dd23525885381af07fec852a Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Tue, 14 Oct 2025 22:00:09 +0530 Subject: [PATCH 063/263] RISC-V: Don't print details of CPUs disabled in DT Early boot stages may disable CPU DT nodes for unavailable CPUs based on SKU, pinstraps, eFuse, etc. Currently, the riscv_early_of_processor_hartid() prints details of a CPU if it is disabled in DT which has no value and gives a false impression to the users that there some issue with the CPU. Fixes: e3d794d555cd ("riscv: treat cpu devicetree nodes without status as enabled") Signed-off-by: Anup Patel Reviewed-by: Andrew Jones Reviewed-by: Conor Dooley Link: https://lore.kernel.org/r/20251014163009.182381-1-apatel@ventanamicro.com Signed-off-by: Paul Walmsley --- arch/riscv/kernel/cpu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index f6b13e9f5e6c..3dbc8cc557dd 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -62,10 +62,8 @@ int __init riscv_early_of_processor_hartid(struct device_node *node, unsigned lo return -ENODEV; } - if (!of_device_is_available(node)) { - pr_info("CPU with hartid=%lu is not available\n", *hart); + if (!of_device_is_available(node)) return -ENODEV; - } if (of_property_read_string(node, "riscv,isa-base", &isa)) goto old_interface; From 492c513ec6de1ce51b5f033bd6c708e4b8e46ae4 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Tue, 14 Oct 2025 17:25:26 -0600 Subject: [PATCH 064/263] riscv: add a forward declaration for cpuinfo_op Add a forward declaration for cpuinfo_op to resolve a sparse warning. Link: https://lore.kernel.org/r/b831f349-5d0c-f7ac-8362-acb20bc6221a@kernel.org Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/cpufeature.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index fbd0e4306c93..62837fa981e8 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -31,6 +31,8 @@ struct riscv_isainfo { DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +extern const struct seq_operations cpuinfo_op; + /* Per-cpu ISA extensions. */ extern struct riscv_isainfo hart_isa[NR_CPUS]; From 5d15d2ad36b0f7afab83ca9fc8a2a6e60cbe54c4 Mon Sep 17 00:00:00 2001 From: Jingwei Wang Date: Mon, 11 Aug 2025 22:20:06 +0800 Subject: [PATCH 065/263] riscv: hwprobe: Fix stale vDSO data for late-initialized keys at boot The hwprobe vDSO data for some keys, like MISALIGNED_VECTOR_PERF, is determined by an asynchronous kthread. This can create a race condition where the kthread finishes after the vDSO data has already been populated, causing userspace to read stale values. To fix this race, a new 'ready' flag is added to the vDSO data, initialized to 'false' during arch_initcall_sync. This flag is checked by both the vDSO's user-space code and the riscv_hwprobe syscall. The syscall serves as a one-time gate, using a completion to wait for any pending probes before populating the data and setting the flag to 'true', thus ensuring userspace reads fresh values on its first request. Reported-by: Tsukasa OI Closes: https://lore.kernel.org/linux-riscv/760d637b-b13b-4518-b6bf-883d55d44e7f@irq.a4lg.com/ Fixes: e7c9d66e313b ("RISC-V: Report vector unaligned access speed hwprobe") Cc: Palmer Dabbelt Cc: Alexandre Ghiti Cc: Olof Johansson Cc: stable@vger.kernel.org Reviewed-by: Alexandre Ghiti Co-developed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt Signed-off-by: Jingwei Wang Link: https://lore.kernel.org/r/20250811142035.105820-1-wangjingwei@iscas.ac.cn [pjw@kernel.org: fix checkpatch issues] Signed-off-by: Paul Walmsley --- arch/riscv/include/asm/hwprobe.h | 7 ++ arch/riscv/include/asm/vdso/arch_data.h | 6 ++ arch/riscv/kernel/sys_hwprobe.c | 74 ++++++++++++++++++---- arch/riscv/kernel/unaligned_access_speed.c | 9 ++- arch/riscv/kernel/vdso/hwprobe.c | 2 +- 5 files changed, 81 insertions(+), 17 deletions(-) diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index 948d2b34e94e..58f8dda73259 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -42,4 +42,11 @@ static inline bool riscv_hwprobe_pair_cmp(struct riscv_hwprobe *pair, return pair->value == other_pair->value; } +#ifdef CONFIG_MMU +void riscv_hwprobe_register_async_probe(void); +void riscv_hwprobe_complete_async_probe(void); +#else +static inline void riscv_hwprobe_register_async_probe(void) {} +static inline void riscv_hwprobe_complete_async_probe(void) {} +#endif #endif diff --git a/arch/riscv/include/asm/vdso/arch_data.h b/arch/riscv/include/asm/vdso/arch_data.h index da57a3786f7a..88b37af55175 100644 --- a/arch/riscv/include/asm/vdso/arch_data.h +++ b/arch/riscv/include/asm/vdso/arch_data.h @@ -12,6 +12,12 @@ struct vdso_arch_data { /* Boolean indicating all CPUs have the same static hwprobe values. */ __u8 homogeneous_cpus; + + /* + * A gate to check and see if the hwprobe data is actually ready, as + * probing is deferred to avoid boot slowdowns. + */ + __u8 ready; }; #endif /* __RISCV_ASM_VDSO_ARCH_DATA_H */ diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 000f4451a9d8..bc87bb9725fd 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -5,6 +5,9 @@ * more details. */ #include +#include +#include +#include #include #include #include @@ -454,28 +457,32 @@ static int hwprobe_get_cpus(struct riscv_hwprobe __user *pairs, return 0; } -static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, - size_t pair_count, size_t cpusetsize, - unsigned long __user *cpus_user, - unsigned int flags) -{ - if (flags & RISCV_HWPROBE_WHICH_CPUS) - return hwprobe_get_cpus(pairs, pair_count, cpusetsize, - cpus_user, flags); - - return hwprobe_get_values(pairs, pair_count, cpusetsize, - cpus_user, flags); -} - #ifdef CONFIG_MMU -static int __init init_hwprobe_vdso_data(void) +static DECLARE_COMPLETION(boot_probes_done); +static atomic_t pending_boot_probes = ATOMIC_INIT(1); + +void riscv_hwprobe_register_async_probe(void) +{ + atomic_inc(&pending_boot_probes); +} + +void riscv_hwprobe_complete_async_probe(void) +{ + if (atomic_dec_and_test(&pending_boot_probes)) + complete(&boot_probes_done); +} + +static int complete_hwprobe_vdso_data(void) { struct vdso_arch_data *avd = vdso_k_arch_data; u64 id_bitsmash = 0; struct riscv_hwprobe pair; int key; + if (unlikely(!atomic_dec_and_test(&pending_boot_probes))) + wait_for_completion(&boot_probes_done); + /* * Initialize vDSO data with the answers for the "all CPUs" case, to * save a syscall in the common case. @@ -503,13 +510,52 @@ static int __init init_hwprobe_vdso_data(void) * vDSO should defer to the kernel for exotic cpu masks. */ avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1; + + /* + * Make sure all the VDSO values are visible before we look at them. + * This pairs with the implicit "no speculativly visible accesses" + * barrier in the VDSO hwprobe code. + */ + smp_wmb(); + avd->ready = true; + return 0; +} + +static int __init init_hwprobe_vdso_data(void) +{ + struct vdso_arch_data *avd = vdso_k_arch_data; + + /* + * Prevent the vDSO cached values from being used, as they're not ready + * yet. + */ + avd->ready = false; return 0; } arch_initcall_sync(init_hwprobe_vdso_data); +#else + +static int complete_hwprobe_vdso_data(void) { return 0; } + #endif /* CONFIG_MMU */ +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, + size_t pair_count, size_t cpusetsize, + unsigned long __user *cpus_user, + unsigned int flags) +{ + DO_ONCE_SLEEPABLE(complete_hwprobe_vdso_data); + + if (flags & RISCV_HWPROBE_WHICH_CPUS) + return hwprobe_get_cpus(pairs, pair_count, cpusetsize, + cpus_user, flags); + + return hwprobe_get_values(pairs, pair_count, cpusetsize, + cpus_user, flags); +} + SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs, size_t, pair_count, size_t, cpusetsize, unsigned long __user *, cpus, unsigned int, flags) diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index ae2068425fbc..70b5e6927620 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -379,6 +379,7 @@ static void check_vector_unaligned_access(struct work_struct *work __always_unus static int __init vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { schedule_on_each_cpu(check_vector_unaligned_access); + riscv_hwprobe_complete_async_probe(); return 0; } @@ -473,8 +474,12 @@ static int __init check_unaligned_access_all_cpus(void) per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param; } else if (!check_vector_unaligned_access_emulated_all_cpus() && IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { - kthread_run(vec_check_unaligned_access_speed_all_cpus, - NULL, "vec_check_unaligned_access_speed_all_cpus"); + riscv_hwprobe_register_async_probe(); + if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"))) { + pr_warn("Failed to create vec_unalign_check kthread\n"); + riscv_hwprobe_complete_async_probe(); + } } /* diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c index 2ddeba6c68dd..8f45500d0a6e 100644 --- a/arch/riscv/kernel/vdso/hwprobe.c +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -27,7 +27,7 @@ static int riscv_vdso_get_values(struct riscv_hwprobe *pairs, size_t pair_count, * homogeneous, then this function can handle requests for arbitrary * masks. */ - if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus)) + if (flags != 0 || (!all_cpus && !avd->homogeneous_cpus) || unlikely(!avd->ready)) return riscv_hwprobe(pairs, pair_count, cpusetsize, cpus, flags); /* This is something we can handle, fill out the pairs. */ From 2dc99ea2727640b2fe12f9aa0e38ea2fc3cbb92d Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 18 Oct 2025 00:31:11 -0600 Subject: [PATCH 066/263] riscv: cpufeature: avoid uninitialized variable in has_thead_homogeneous_vlenb() In has_thead_homogeneous_vlenb(), smatch detected that the vlenb variable could be used while uninitialized. It appears that this could happen if no CPUs described in DT have the "thead,vlenb" property. Fix by initializing vlenb to 0, which will keep thead_vlenb_of set to 0 (as it was statically initialized). This in turn will cause riscv_v_setup_vsize() to fall back to CSR probing - the desired result if thead,vlenb isn't provided in the DT data. While here, fix a nearby comment typo. Cc: stable@vger.kernel.org Cc: Charlie Jenkins Fixes: 377be47f90e41 ("riscv: vector: Use vlenb from DT for thead") Signed-off-by: Paul Walmsley Link: https://lore.kernel.org/r/22674afb-2fe8-2a83-1818-4c37bd554579@kernel.org --- arch/riscv/kernel/cpufeature.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 67b59699357d..72ca768f4e91 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -932,9 +932,9 @@ static int has_thead_homogeneous_vlenb(void) { int cpu; u32 prev_vlenb = 0; - u32 vlenb; + u32 vlenb = 0; - /* Ignore thead,vlenb property if xtheavector is not enabled in the kernel */ + /* Ignore thead,vlenb property if xtheadvector is not enabled in the kernel */ if (!IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR)) return 0; From b7776a802f2f80139f96530a489dd00fd7089eda Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sat, 18 Oct 2025 09:32:12 -0600 Subject: [PATCH 067/263] riscv: hwprobe: avoid uninitialized variable use in hwprobe_arch_id() Resolve this smatch warning: arch/riscv/kernel/sys_hwprobe.c:50 hwprobe_arch_id() error: uninitialized symbol 'cpu_id'. This could happen if hwprobe_arch_id() was called with a key ID of something other than MVENDORID, MIMPID, and MARCHID. This does not happen in the current codebase. The only caller of hwprobe_arch_id() is a function that only passes one of those three key IDs. For the sake of reducing static analyzer warning noise, and in the unlikely event that hwprobe_arch_id() is someday called with some other key ID, validate hwprobe_arch_id()'s input to ensure that 'cpu_id' is always initialized before use. Fixes: ea3de9ce8aa280 ("RISC-V: Add a syscall for HW probing") Cc: Evan Green Signed-off-by: Paul Walmsley Link: https://lore.kernel.org/r/cf5a13ec-19d0-9862-059b-943f36107bf3@kernel.org --- arch/riscv/kernel/sys_hwprobe.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index bc87bb9725fd..199d13f86f31 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -31,6 +31,11 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, bool first = true; int cpu; + if (pair->key != RISCV_HWPROBE_KEY_MVENDORID && + pair->key != RISCV_HWPROBE_KEY_MIMPID && + pair->key != RISCV_HWPROBE_KEY_MARCHID) + goto out; + for_each_cpu(cpu, cpus) { u64 cpu_id; @@ -61,6 +66,7 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair, } } +out: pair->value = id; } From 77e67d5daaf155f7d0f99f4e797c4842169ec19e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 23 Sep 2025 14:20:16 +0300 Subject: [PATCH 068/263] wifi: iwlwifi: fix potential use after free in iwl_mld_remove_link() This code frees "link" by calling kfree_rcu(link, rcu_head) and then it dereferences "link" to get the "link->fw_id". Save the "link->fw_id" first to avoid a potential use after free. Fixes: d1e879ec600f ("wifi: iwlwifi: add iwlmld sub-driver") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/aNKCcKlbSkkS4_gO@stanley.mountain Signed-off-by: Miri Korenblit --- drivers/net/wireless/intel/iwlwifi/mld/link.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/link.c b/drivers/net/wireless/intel/iwlwifi/mld/link.c index 782fc41aa1c3..960dcd208f00 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/link.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/link.c @@ -501,6 +501,7 @@ void iwl_mld_remove_link(struct iwl_mld *mld, struct iwl_mld_vif *mld_vif = iwl_mld_vif_from_mac80211(bss_conf->vif); struct iwl_mld_link *link = iwl_mld_link_from_mac80211(bss_conf); bool is_deflink = link == &mld_vif->deflink; + u8 fw_id = link->fw_id; if (WARN_ON(!link || link->active)) return; @@ -513,10 +514,10 @@ void iwl_mld_remove_link(struct iwl_mld *mld, RCU_INIT_POINTER(mld_vif->link[bss_conf->link_id], NULL); - if (WARN_ON(link->fw_id >= mld->fw->ucode_capa.num_links)) + if (WARN_ON(fw_id >= mld->fw->ucode_capa.num_links)) return; - RCU_INIT_POINTER(mld->fw_id_to_bss_conf[link->fw_id], NULL); + RCU_INIT_POINTER(mld->fw_id_to_bss_conf[fw_id], NULL); } void iwl_mld_handle_missed_beacon_notif(struct iwl_mld *mld, From 41de7440e6a00b8e70a068c50e3fba2f56302e8a Mon Sep 17 00:00:00 2001 From: Alexis Czezar Torreno Date: Wed, 1 Oct 2025 08:37:07 +0800 Subject: [PATCH 069/263] hwmon: (pmbus/max34440) Update adpm12160 coeff due to latest FW adpm12160 is a dc-dc power module. The firmware was updated and the coeeficients in the pmbus_driver_info needs to be updated. Since the part has not yet released with older FW, this permanent change to reflect the latest should be ok. Signed-off-by: Alexis Czezar Torreno Link: https://lore.kernel.org/r/20251001-hwmon-next-v1-1-f8ca6a648203@analog.com Fixes: 629cf8f6c23a ("hwmon: (pmbus/max34440) Add support for ADPM12160") Cc: stable@vger.kernel.org # v6.16+ Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/max34440.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/pmbus/max34440.c b/drivers/hwmon/pmbus/max34440.c index 56834d26f8ef..ef981ed97da8 100644 --- a/drivers/hwmon/pmbus/max34440.c +++ b/drivers/hwmon/pmbus/max34440.c @@ -336,18 +336,18 @@ static struct pmbus_driver_info max34440_info[] = { .format[PSC_CURRENT_IN] = direct, .format[PSC_CURRENT_OUT] = direct, .format[PSC_TEMPERATURE] = direct, - .m[PSC_VOLTAGE_IN] = 1, + .m[PSC_VOLTAGE_IN] = 125, .b[PSC_VOLTAGE_IN] = 0, .R[PSC_VOLTAGE_IN] = 0, - .m[PSC_VOLTAGE_OUT] = 1, + .m[PSC_VOLTAGE_OUT] = 125, .b[PSC_VOLTAGE_OUT] = 0, .R[PSC_VOLTAGE_OUT] = 0, - .m[PSC_CURRENT_IN] = 1, + .m[PSC_CURRENT_IN] = 250, .b[PSC_CURRENT_IN] = 0, - .R[PSC_CURRENT_IN] = 2, - .m[PSC_CURRENT_OUT] = 1, + .R[PSC_CURRENT_IN] = -1, + .m[PSC_CURRENT_OUT] = 250, .b[PSC_CURRENT_OUT] = 0, - .R[PSC_CURRENT_OUT] = 2, + .R[PSC_CURRENT_OUT] = -1, .m[PSC_TEMPERATURE] = 1, .b[PSC_TEMPERATURE] = 0, .R[PSC_TEMPERATURE] = 2, From ab0fd09d25e1d706e1ffc240f5cf66dcc89eeb49 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 10 Oct 2025 13:43:59 -0700 Subject: [PATCH 070/263] hwmon: (gpd-fan) Fix return value when platform_get_resource() fails When platform_get_resource() fails it returns NULL and not an error pointer, accordingly change the error handling. Fixes: 0ab88e239439 ("hwmon: add GPD devices sensor driver") Signed-off-by: Harshit Mogalapalli Link: https://lore.kernel.org/r/20251010204359.94300-1-harshit.m.mogalapalli@oracle.com Signed-off-by: Guenter Roeck --- drivers/hwmon/gpd-fan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/gpd-fan.c b/drivers/hwmon/gpd-fan.c index 644dc3ca9df7..eebe39ef9677 100644 --- a/drivers/hwmon/gpd-fan.c +++ b/drivers/hwmon/gpd-fan.c @@ -615,8 +615,8 @@ static int gpd_fan_probe(struct platform_device *pdev) const struct device *hwdev; res = platform_get_resource(pdev, IORESOURCE_IO, 0); - if (IS_ERR(res)) - return dev_err_probe(dev, PTR_ERR(res), + if (!res) + return dev_err_probe(dev, -EINVAL, "Failed to get platform resource\n"); region = devm_request_region(dev, res->start, From 72ac14851012d45dcbb9d3533e372e33001b873e Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Fri, 10 Oct 2025 13:44:46 -0700 Subject: [PATCH 071/263] hwmon: (gpd-fan) Fix error handling in gpd_fan_probe() devm_request_region() returns a NULL pointer on error, not an ERR_PTR(). Handle it accordingly. Also fix error return from the call to devm_hwmon_device_register_with_info(). Fixes: 0ab88e239439 ("hwmon: add GPD devices sensor driver") Signed-off-by: Harshit Mogalapalli Reviewed-by: Cryolitia PukNgae Link: https://lore.kernel.org/r/20251010204447.94343-1-harshit.m.mogalapalli@oracle.com [groeck: Updated subject to improve readability] Signed-off-by: Guenter Roeck --- drivers/hwmon/gpd-fan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/gpd-fan.c b/drivers/hwmon/gpd-fan.c index eebe39ef9677..321794807e8d 100644 --- a/drivers/hwmon/gpd-fan.c +++ b/drivers/hwmon/gpd-fan.c @@ -621,8 +621,8 @@ static int gpd_fan_probe(struct platform_device *pdev) region = devm_request_region(dev, res->start, resource_size(res), DRIVER_NAME); - if (IS_ERR(region)) - return dev_err_probe(dev, PTR_ERR(region), + if (!region) + return dev_err_probe(dev, -EBUSY, "Failed to request region\n"); hwdev = devm_hwmon_device_register_with_info(dev, @@ -631,7 +631,7 @@ static int gpd_fan_probe(struct platform_device *pdev) &gpd_fan_chip_info, NULL); if (IS_ERR(hwdev)) - return dev_err_probe(dev, PTR_ERR(region), + return dev_err_probe(dev, PTR_ERR(hwdev), "Failed to register hwmon device\n"); return 0; From 57f6f47920ef2f598c46d0a04bd9c8984c98e6df Mon Sep 17 00:00:00 2001 From: Erick Karanja Date: Sun, 12 Oct 2025 21:12:49 +0300 Subject: [PATCH 072/263] hwmon: (pmbus/isl68137) Fix child node reference leak on early return In the case of an early return, the reference to the child node needs to be released. Use for_each_child_of_node_scoped to fix the issue. Fixes: 3996187f80a0e ("hwmon: (pmbus/isl68137) add support for voltage divider on Vout") Signed-off-by: Erick Karanja Link: https://lore.kernel.org/r/20251012181249.359401-1-karanja99erick@gmail.com [groeck: Updated subject/description] Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/isl68137.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hwmon/pmbus/isl68137.c b/drivers/hwmon/pmbus/isl68137.c index 52cf62e45a86..6bba9b50c51b 100644 --- a/drivers/hwmon/pmbus/isl68137.c +++ b/drivers/hwmon/pmbus/isl68137.c @@ -336,10 +336,9 @@ static int isl68137_probe_from_dt(struct device *dev, struct isl68137_data *data) { const struct device_node *np = dev->of_node; - struct device_node *child; int err; - for_each_child_of_node(np, child) { + for_each_child_of_node_scoped(np, child) { if (strcmp(child->name, "channel")) continue; From a09a5aa8bf258ddc99a22c30f17fe304b96b5350 Mon Sep 17 00:00:00 2001 From: Li Qiang Date: Fri, 17 Oct 2025 14:34:14 +0800 Subject: [PATCH 073/263] hwmon: (cgbc-hwmon) Add missing NULL check after devm_kzalloc() The driver allocates memory for sensor data using devm_kzalloc(), but did not check if the allocation succeeded. In case of memory allocation failure, dereferencing the NULL pointer would lead to a kernel crash. Add a NULL pointer check and return -ENOMEM to handle allocation failure properly. Signed-off-by: Li Qiang Fixes: 08ebc9def79fc ("hwmon: Add Congatec Board Controller monitoring driver") Reviewed-by: Thomas Richard Link: https://lore.kernel.org/r/20251017063414.1557447-1-liqiang01@kylinos.cn Signed-off-by: Guenter Roeck --- drivers/hwmon/cgbc-hwmon.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hwmon/cgbc-hwmon.c b/drivers/hwmon/cgbc-hwmon.c index 772f44d56ccf..3aff4e092132 100644 --- a/drivers/hwmon/cgbc-hwmon.c +++ b/drivers/hwmon/cgbc-hwmon.c @@ -107,6 +107,9 @@ static int cgbc_hwmon_probe_sensors(struct device *dev, struct cgbc_hwmon_data * nb_sensors = data[0]; hwmon->sensors = devm_kzalloc(dev, sizeof(*hwmon->sensors) * nb_sensors, GFP_KERNEL); + if (!hwmon->sensors) + return -ENOMEM; + sensor = hwmon->sensors; for (i = 0; i < nb_sensors; i++) { From 8dcc66ad379ec0642fb281c45ccfd7d2d366e53f Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 18 Oct 2025 06:04:57 -0700 Subject: [PATCH 074/263] hwmon: (sht3x) Fix error handling Handling of errors when reading status, temperature, and humidity returns the error number as negative attribute value. Fix it up by returning the error as return value. Fixes: a0ac418c6007c ("hwmon: (sht3x) convert some of sysfs interface to hwmon") Cc: JuenKit Yip Signed-off-by: Guenter Roeck --- drivers/hwmon/sht3x.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/hwmon/sht3x.c b/drivers/hwmon/sht3x.c index 557ad3e7752a..f36c0229328f 100644 --- a/drivers/hwmon/sht3x.c +++ b/drivers/hwmon/sht3x.c @@ -291,24 +291,26 @@ static struct sht3x_data *sht3x_update_client(struct device *dev) return data; } -static int temp1_input_read(struct device *dev) +static int temp1_input_read(struct device *dev, long *temp) { struct sht3x_data *data = sht3x_update_client(dev); if (IS_ERR(data)) return PTR_ERR(data); - return data->temperature; + *temp = data->temperature; + return 0; } -static int humidity1_input_read(struct device *dev) +static int humidity1_input_read(struct device *dev, long *humidity) { struct sht3x_data *data = sht3x_update_client(dev); if (IS_ERR(data)) return PTR_ERR(data); - return data->humidity; + *humidity = data->humidity; + return 0; } /* @@ -706,6 +708,7 @@ static int sht3x_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { enum sht3x_limits index; + int ret; switch (type) { case hwmon_chip: @@ -720,10 +723,12 @@ static int sht3x_read(struct device *dev, enum hwmon_sensor_types type, case hwmon_temp: switch (attr) { case hwmon_temp_input: - *val = temp1_input_read(dev); - break; + return temp1_input_read(dev, val); case hwmon_temp_alarm: - *val = temp1_alarm_read(dev); + ret = temp1_alarm_read(dev); + if (ret < 0) + return ret; + *val = ret; break; case hwmon_temp_max: index = limit_max; @@ -748,10 +753,12 @@ static int sht3x_read(struct device *dev, enum hwmon_sensor_types type, case hwmon_humidity: switch (attr) { case hwmon_humidity_input: - *val = humidity1_input_read(dev); - break; + return humidity1_input_read(dev, val); case hwmon_humidity_alarm: - *val = humidity1_alarm_read(dev); + ret = humidity1_alarm_read(dev); + if (ret < 0) + return ret; + *val = ret; break; case hwmon_humidity_max: index = limit_max; From e607ef686ab95fbcb0dfd16f49aea7918be626e1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Thu, 16 Oct 2025 12:54:21 +0200 Subject: [PATCH 075/263] smb: client: allocate enough space for MR WRs and ib_drain_qp() The IB_WR_REG_MR and IB_WR_LOCAL_INV operations for smbdirect_mr_io structures should never fail because the submission or completion queues are too small. So we allocate more send_wr depending on the (local) max number of MRs. While there also add additional space for ib_drain_qp(). This should make sure ib_post_send() will never fail because the submission queue is full. Fixes: f198186aa9bb ("CIFS: SMBD: Establish SMB Direct connection") Fixes: cc55f65dd352 ("smb: client: make use of common smbdirect_socket_parameters") Cc: stable@vger.kernel.org Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 49e2df3ad1f0..b1218ea4aa8b 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -1767,6 +1767,7 @@ static struct smbd_connection *_smbd_get_connection( struct smbdirect_socket *sc; struct smbdirect_socket_parameters *sp; struct rdma_conn_param conn_param; + struct ib_qp_cap qp_cap; struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; struct ib_port_immutable port_immutable; @@ -1838,6 +1839,25 @@ static struct smbd_connection *_smbd_get_connection( goto config_failed; } + sp->responder_resources = + min_t(u8, sp->responder_resources, + sc->ib.dev->attrs.max_qp_rd_atom); + log_rdma_mr(INFO, "responder_resources=%d\n", + sp->responder_resources); + + /* + * We use allocate sp->responder_resources * 2 MRs + * and each MR needs WRs for REG and INV, so + * we use '* 4'. + * + * +1 for ib_drain_qp() + */ + memset(&qp_cap, 0, sizeof(qp_cap)); + qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1; + qp_cap.max_recv_wr = sp->recv_credit_max + 1; + qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; + qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); if (IS_ERR(sc->ib.pd)) { rc = PTR_ERR(sc->ib.pd); @@ -1848,7 +1868,7 @@ static struct smbd_connection *_smbd_get_connection( sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->send_credit_target, IB_POLL_SOFTIRQ); + qp_cap.max_send_wr, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.send_cq)) { sc->ib.send_cq = NULL; goto alloc_cq_failed; @@ -1856,7 +1876,7 @@ static struct smbd_connection *_smbd_get_connection( sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->recv_credit_max, IB_POLL_SOFTIRQ); + qp_cap.max_recv_wr, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.recv_cq)) { sc->ib.recv_cq = NULL; goto alloc_cq_failed; @@ -1865,11 +1885,7 @@ static struct smbd_connection *_smbd_get_connection( memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smbd_qp_async_error_upcall; qp_attr.qp_context = sc; - qp_attr.cap.max_send_wr = sp->send_credit_target; - qp_attr.cap.max_recv_wr = sp->recv_credit_max; - qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; - qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; - qp_attr.cap.max_inline_data = 0; + qp_attr.cap = qp_cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = sc->ib.send_cq; @@ -1883,12 +1899,6 @@ static struct smbd_connection *_smbd_get_connection( } sc->ib.qp = sc->rdma.cm_id->qp; - sp->responder_resources = - min_t(u8, sp->responder_resources, - sc->ib.dev->attrs.max_qp_rd_atom); - log_rdma_mr(INFO, "responder_resources=%d\n", - sp->responder_resources); - memset(&conn_param, 0, sizeof(conn_param)); conn_param.initiator_depth = sp->initiator_depth; conn_param.responder_resources = sp->responder_resources; From 103541e6a5854b08a25e4caa61e990af1009a52e Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 2 Oct 2025 08:22:35 +0000 Subject: [PATCH 076/263] rv: Fully convert enabled_monitors to use list_head as iterator The callbacks in enabled_monitors_seq_ops are inconsistent. Some treat the iterator as struct rv_monitor *, while others treat the iterator as struct list_head *. This causes a wrong type cast and crashes the system as reported by Nathan. Convert everything to use struct list_head * as iterator. This also makes enabled_monitors consistent with available_monitors. Fixes: de090d1ccae1 ("rv: Fix wrong type cast in enabled_monitors_next()") Reported-by: Nathan Chancellor Closes: https://lore.kernel.org/linux-trace-kernel/20250923002004.GA2836051@ax162/ Signed-off-by: Nam Cao Cc: stable@vger.kernel.org Reviewed-by: Gabriele Monaco Link: https://lore.kernel.org/r/20251002082235.973099-1-namcao@linutronix.de Signed-off-by: Gabriele Monaco --- kernel/trace/rv/rv.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 48338520376f..43e9ea473cda 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -501,7 +501,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) list_for_each_entry_continue(mon, &rv_monitors_list, list) { if (mon->enabled) - return mon; + return &mon->list; } return NULL; @@ -509,7 +509,7 @@ static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) { - struct rv_monitor *mon; + struct list_head *head; loff_t l; mutex_lock(&rv_interface_lock); @@ -517,15 +517,15 @@ static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) if (list_empty(&rv_monitors_list)) return NULL; - mon = list_entry(&rv_monitors_list, struct rv_monitor, list); + head = &rv_monitors_list; for (l = 0; l <= *pos; ) { - mon = enabled_monitors_next(m, mon, &l); - if (!mon) + head = enabled_monitors_next(m, head, &l); + if (!head) break; } - return mon; + return head; } /* From 3d62f95bd8450cebb4a4741bf83949cd54edd4a3 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Thu, 2 Oct 2025 08:23:17 +0000 Subject: [PATCH 077/263] rv: Make rtapp/pagefault monitor depends on CONFIG_MMU There is no page fault without MMU. Compiling the rtapp/pagefault monitor without CONFIG_MMU fails as page fault tracepoints' definitions are not available. Make rtapp/pagefault monitor depends on CONFIG_MMU. Fixes: 9162620eb604 ("rv: Add rtapp_pagefault monitor") Signed-off-by: Nam Cao Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202509260455.6Z9Vkty4-lkp@intel.com/ Cc: stable@vger.kernel.org Reviewed-by: Gabriele Monaco Link: https://lore.kernel.org/r/20251002082317.973839-1-namcao@linutronix.de Signed-off-by: Gabriele Monaco --- kernel/trace/rv/monitors/pagefault/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/rv/monitors/pagefault/Kconfig b/kernel/trace/rv/monitors/pagefault/Kconfig index 5e16625f1653..0e013f00c33b 100644 --- a/kernel/trace/rv/monitors/pagefault/Kconfig +++ b/kernel/trace/rv/monitors/pagefault/Kconfig @@ -5,6 +5,7 @@ config RV_MON_PAGEFAULT select RV_LTL_MONITOR depends on RV_MON_RTAPP depends on X86 || RISCV + depends on MMU default y select LTL_MON_EVENTS_ID bool "pagefault monitor" From 607844761454e3c17e928002e126ccf21c83f6aa Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Wed, 24 Sep 2025 18:30:14 +0530 Subject: [PATCH 078/263] wifi: mac80211: reset FILS discovery and unsol probe resp intervals When ieee80211_stop_ap() deletes the FILS discovery and unsolicited broadcast probe response templates, the associated interval values are not reset. This can lead to drivers subsequently operating with the non-zero values, leading to unexpected behavior. Trigger repeated retrieval attempts of the FILS discovery template in ath12k, resulting in excessive log messages such as: mac vdev 0 failed to retrieve FILS discovery template mac vdev 4 failed to retrieve FILS discovery template Fix this by resetting the intervals in ieee80211_stop_ap() to ensure proper cleanup of FILS discovery and unsolicited broadcast probe response templates. Fixes: 295b02c4be74 ("mac80211: Add FILS discovery support") Fixes: 632189a0180f ("mac80211: Unsolicited broadcast probe response support") Signed-off-by: Aloka Dixit Signed-off-by: Aaradhana Sahu Link: https://patch.msgid.link/20250924130014.2575533-1-aaradhana.sahu@oss.qualcomm.com Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d9aca1c3c097..c52b0456039d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1876,6 +1876,9 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, link_conf->nontransmitted = false; link_conf->ema_ap = false; link_conf->bssid_indicator = 0; + link_conf->fils_discovery.min_interval = 0; + link_conf->fils_discovery.max_interval = 0; + link_conf->unsol_bcast_probe_resp_interval = 0; __sta_info_flush(sdata, true, link_id, NULL); From a2a69add80411dd295c9088c1bcf925b1f4e53d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 3 Oct 2025 14:51:26 +0200 Subject: [PATCH 079/263] bcma: don't register devices disabled in OF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some bus devices can be marked as disabled for specific SoCs or models. Those should not be registered to avoid probing them. Signed-off-by: Rafał Miłecki Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20251003125126.27950-1-zajec5@gmail.com Signed-off-by: Johannes Berg --- drivers/bcma/main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 6ecfc821cf83..72f045e6ed51 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -294,6 +294,8 @@ static int bcma_register_devices(struct bcma_bus *bus) int err; list_for_each_entry(core, &bus->cores, list) { + struct device_node *np; + /* We support that core ourselves */ switch (core->id.id) { case BCMA_CORE_4706_CHIPCOMMON: @@ -311,6 +313,10 @@ static int bcma_register_devices(struct bcma_bus *bus) if (bcma_is_core_needed_early(core->id.id)) continue; + np = core->dev.of_node; + if (np && !of_device_is_available(np)) + continue; + /* Only first GMAC core on BCM4706 is connected and working */ if (core->id.id == BCMA_CORE_4706_MAC_GBIT && core->core_unit > 0) From 1e1801cab6c7f302baec2a0fe3afe25458d0be7e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 11 Oct 2025 00:57:35 +0100 Subject: [PATCH 080/263] MAINTAINERS: wcn36xx: Add linux-wireless list The wcn36xx is a wireless device but doesn't have the wireless list in its MAINTAINERS entry. Add it. Signed-off-by: Dr. David Alan Gilbert Acked-by: Jeff Johnson Link: https://patch.msgid.link/20251010235735.350638-1-linux@treblig.org Signed-off-by: Johannes Berg --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..91bf59792060 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21317,6 +21317,7 @@ F: drivers/media/platform/qcom/venus/ QUALCOMM WCN36XX WIRELESS DRIVER M: Loic Poulain L: wcn36xx@lists.infradead.org +L: linux-wireless@vger.kernel.org S: Supported W: https://wireless.wiki.kernel.org/en/users/Drivers/wcn36xx F: drivers/net/wireless/ath/wcn36xx/ From 3776c685ebe5f43e9060af06872661de55e80b9a Mon Sep 17 00:00:00 2001 From: Gokul Sivakumar Date: Mon, 13 Oct 2025 15:58:19 +0530 Subject: [PATCH 081/263] wifi: brcmfmac: fix crash while sending Action Frames in standalone AP Mode Currently, whenever there is a need to transmit an Action frame, the brcmfmac driver always uses the P2P vif to send the "actframe" IOVAR to firmware. The P2P interfaces were available when wpa_supplicant is managing the wlan interface. However, the P2P interfaces are not created/initialized when only hostapd is managing the wlan interface. And if hostapd receives an ANQP Query REQ Action frame even from an un-associated STA, the brcmfmac driver tries to use an uninitialized P2P vif pointer for sending the IOVAR to firmware. This NULL pointer dereferencing triggers a driver crash. [ 1417.074538] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 [...] [ 1417.075188] Hardware name: Raspberry Pi 4 Model B Rev 1.5 (DT) [...] [ 1417.075653] Call trace: [ 1417.075662] brcmf_p2p_send_action_frame+0x23c/0xc58 [brcmfmac] [ 1417.075738] brcmf_cfg80211_mgmt_tx+0x304/0x5c0 [brcmfmac] [ 1417.075810] cfg80211_mlme_mgmt_tx+0x1b0/0x428 [cfg80211] [ 1417.076067] nl80211_tx_mgmt+0x238/0x388 [cfg80211] [ 1417.076281] genl_family_rcv_msg_doit+0xe0/0x158 [ 1417.076302] genl_rcv_msg+0x220/0x2a0 [ 1417.076317] netlink_rcv_skb+0x68/0x140 [ 1417.076330] genl_rcv+0x40/0x60 [ 1417.076343] netlink_unicast+0x330/0x3b8 [ 1417.076357] netlink_sendmsg+0x19c/0x3f8 [ 1417.076370] __sock_sendmsg+0x64/0xc0 [ 1417.076391] ____sys_sendmsg+0x268/0x2a0 [ 1417.076408] ___sys_sendmsg+0xb8/0x118 [ 1417.076427] __sys_sendmsg+0x90/0xf8 [ 1417.076445] __arm64_sys_sendmsg+0x2c/0x40 [ 1417.076465] invoke_syscall+0x50/0x120 [ 1417.076486] el0_svc_common.constprop.0+0x48/0xf0 [ 1417.076506] do_el0_svc+0x24/0x38 [ 1417.076525] el0_svc+0x30/0x100 [ 1417.076548] el0t_64_sync_handler+0x100/0x130 [ 1417.076569] el0t_64_sync+0x190/0x198 [ 1417.076589] Code: f9401e80 aa1603e2 f9403be1 5280e483 (f9400000) Fix this, by always using the vif corresponding to the wdev on which the Action frame Transmission request was initiated by the userspace. This way, even if P2P vif is not available, the IOVAR is sent to firmware on AP vif and the ANQP Query RESP Action frame is transmitted without crashing the driver. Move init_completion() for "send_af_done" from brcmf_p2p_create_p2pdev() to brcmf_p2p_attach(). Because the former function would not get executed when only hostapd is managing wlan interface, and it is not safe to do reinit_completion() later in brcmf_p2p_tx_action_frame(), without any prior init_completion(). And in the brcmf_p2p_tx_action_frame() function, the condition check for P2P Presence response frame is not needed, since the wpa_supplicant is properly sending the P2P Presense Response frame on the P2P-GO vif instead of the P2P-Device vif. Cc: stable@vger.kernel.org Fixes: 18e2f61db3b7 ("brcmfmac: P2P action frame tx") Signed-off-by: Gokul Sivakumar Acked-by: Arend van Spriel Link: https://patch.msgid.link/20251013102819.9727-1-gokulkumar.sivakumar@infineon.com [Cc stable] Signed-off-by: Johannes Berg --- .../broadcom/brcm80211/brcmfmac/cfg80211.c | 3 +- .../broadcom/brcm80211/brcmfmac/p2p.c | 28 +++++++------------ .../broadcom/brcm80211/brcmfmac/p2p.h | 3 +- 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 8afaffe31031..bb96b87b2a6e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -5627,8 +5627,7 @@ brcmf_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, *cookie, le16_to_cpu(action_frame->len), le32_to_cpu(af_params->channel)); - ack = brcmf_p2p_send_action_frame(cfg, cfg_to_ndev(cfg), - af_params); + ack = brcmf_p2p_send_action_frame(vif->ifp, af_params); cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, ack, GFP_KERNEL); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 0dc9d28cd77b..e1752a513c73 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -1529,6 +1529,7 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, /** * brcmf_p2p_tx_action_frame() - send action frame over fil. * + * @ifp: interface to transmit on. * @p2p: p2p info struct for vif. * @af_params: action frame data/info. * @@ -1538,12 +1539,11 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, * The WLC_E_ACTION_FRAME_COMPLETE event will be received when the action * frame is transmitted. */ -static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, +static s32 brcmf_p2p_tx_action_frame(struct brcmf_if *ifp, + struct brcmf_p2p_info *p2p, struct brcmf_fil_af_params_le *af_params) { struct brcmf_pub *drvr = p2p->cfg->pub; - struct brcmf_cfg80211_vif *vif; - struct brcmf_p2p_action_frame *p2p_af; s32 err = 0; brcmf_dbg(TRACE, "Enter\n"); @@ -1552,14 +1552,7 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, clear_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status); clear_bit(BRCMF_P2P_STATUS_ACTION_TX_NOACK, &p2p->status); - /* check if it is a p2p_presence response */ - p2p_af = (struct brcmf_p2p_action_frame *)af_params->action_frame.data; - if (p2p_af->subtype == P2P_AF_PRESENCE_RSP) - vif = p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif; - else - vif = p2p->bss_idx[P2PAPI_BSSCFG_DEVICE].vif; - - err = brcmf_fil_bsscfg_data_set(vif->ifp, "actframe", af_params, + err = brcmf_fil_bsscfg_data_set(ifp, "actframe", af_params, sizeof(*af_params)); if (err) { bphy_err(drvr, " sending action frame has failed\n"); @@ -1711,16 +1704,14 @@ static bool brcmf_p2p_check_dwell_overflow(u32 requested_dwell, /** * brcmf_p2p_send_action_frame() - send action frame . * - * @cfg: driver private data for cfg80211 interface. - * @ndev: net device to transmit on. + * @ifp: interface to transmit on. * @af_params: configuration data for action frame. */ -bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, - struct net_device *ndev, +bool brcmf_p2p_send_action_frame(struct brcmf_if *ifp, struct brcmf_fil_af_params_le *af_params) { + struct brcmf_cfg80211_info *cfg = ifp->drvr->config; struct brcmf_p2p_info *p2p = &cfg->p2p; - struct brcmf_if *ifp = netdev_priv(ndev); struct brcmf_fil_action_frame_le *action_frame; struct brcmf_config_af_params config_af_params; struct afx_hdl *afx_hdl = &p2p->afx_hdl; @@ -1857,7 +1848,7 @@ bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, if (af_params->channel) msleep(P2P_AF_RETRY_DELAY_TIME); - ack = !brcmf_p2p_tx_action_frame(p2p, af_params); + ack = !brcmf_p2p_tx_action_frame(ifp, p2p, af_params); tx_retry++; dwell_overflow = brcmf_p2p_check_dwell_overflow(requested_dwell, dwell_jiffies); @@ -2217,7 +2208,6 @@ static struct wireless_dev *brcmf_p2p_create_p2pdev(struct brcmf_p2p_info *p2p, WARN_ON(p2p_ifp->bsscfgidx != bsscfgidx); - init_completion(&p2p->send_af_done); INIT_WORK(&p2p->afx_hdl.afx_work, brcmf_p2p_afx_handler); init_completion(&p2p->afx_hdl.act_frm_scan); init_completion(&p2p->wait_next_af); @@ -2513,6 +2503,8 @@ s32 brcmf_p2p_attach(struct brcmf_cfg80211_info *cfg, bool p2pdev_forced) pri_ifp = brcmf_get_ifp(cfg->pub, 0); p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif = pri_ifp->vif; + init_completion(&p2p->send_af_done); + if (p2pdev_forced) { err_ptr = brcmf_p2p_create_p2pdev(p2p, NULL, NULL); if (IS_ERR(err_ptr)) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h index d2ecee565bf2..d3137ebd7158 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h @@ -168,8 +168,7 @@ int brcmf_p2p_notify_action_frame_rx(struct brcmf_if *ifp, int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, const struct brcmf_event_msg *e, void *data); -bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg, - struct net_device *ndev, +bool brcmf_p2p_send_action_frame(struct brcmf_if *ifp, struct brcmf_fil_af_params_le *af_params); bool brcmf_p2p_scan_finding_common_channel(struct brcmf_cfg80211_info *cfg, struct brcmf_bss_info_le *bi); From ed6a47346ec69e7f1659e0a1a3558293f60d5dd7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Oct 2025 11:54:27 +0300 Subject: [PATCH 082/263] wifi: mac80211: fix key tailroom accounting leak For keys added by ieee80211_gtk_rekey_add(), we assume that they're already present in the hardware and set the flag KEY_FLAG_UPLOADED_TO_HARDWARE. However, setting this flag needs to be paired with decrementing the tailroom needed, which was missed. Fixes: f52a0b408ed1 ("wifi: mac80211: mark keys as uploaded when added by the driver") Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20251019115358.c88eafb4083e.I69e9d4d78a756a133668c55b5570cf15a4b0e6a4@changeid Signed-off-by: Johannes Berg --- net/mac80211/key.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/mac80211/key.c b/net/mac80211/key.c index b14e9cd9713f..d5da7ccea66e 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -508,11 +508,16 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ret = ieee80211_key_enable_hw_accel(new); } } else { - if (!new->local->wowlan) + if (!new->local->wowlan) { ret = ieee80211_key_enable_hw_accel(new); - else if (link_id < 0 || !sdata->vif.active_links || - BIT(link_id) & sdata->vif.active_links) + } else if (link_id < 0 || !sdata->vif.active_links || + BIT(link_id) & sdata->vif.active_links) { new->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + if (!(new->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | + IEEE80211_KEY_FLAG_PUT_MIC_SPACE | + IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) + decrease_tailroom_need_count(sdata, 1); + } } if (ret) From 249e1443e3d57e059925bdb698f53e4d008fc106 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Mon, 20 Oct 2025 10:57:45 +0300 Subject: [PATCH 083/263] wifi: nl80211: call kfree without a NULL check Coverity is unhappy because we may leak old_radio_rts_threshold. Since this pointer is only valid in the context of the function and kfree is NULL pointer safe, don't check and just call kfree. Note that somehow, we were checking old_rts_threshold to free old_radio_rts_threshold which is a bit odd. Fixes: 264637941cf4 ("wifi: cfg80211: Add Support to Set RTS Threshold for each Radio") Reviewed-by: Johannes Berg Signed-off-by: Emmanuel Grumbach Link: https://patch.msgid.link/20251020075745.44168-1-emmanuel.grumbach@intel.com Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 346dfd2bd987..03d07b54359a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4136,8 +4136,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.txq_quantum = old_txq_quantum; } - if (old_rts_threshold) - kfree(old_radio_rts_threshold); + kfree(old_radio_rts_threshold); return result; } From 71c07570b918f000de5d0f7f1bf17a2887e303b5 Mon Sep 17 00:00:00 2001 From: Renjun Wang Date: Sun, 19 Oct 2025 18:44:38 +0800 Subject: [PATCH 084/263] USB: serial: option: add UNISOC UIS7720 Add support for UNISOC (Spreadtrum) UIS7720 (A7720) module. T: Bus=05 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1782 ProdID=4064 Rev=04.04 S: Manufacturer=Unisoc-phone S: Product=Unisoc-phone S: SerialNumber=0123456789ABCDEF C: #Ifs= 9 Cfg#= 1 Atr=c0 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 1 Cls=e0(wlcon) Sub=01 Prot=03 Driver=rndis_host E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 7 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0&1: RNDIS, 2: LOG, 3: DIAG, 4&5: AT Ports, 6&7: AT2 Ports, 8: ADB Signed-off-by: Renjun Wang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 62e984d20e59..ed1328648a73 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -617,6 +617,7 @@ static void option_instat_callback(struct urb *urb); #define UNISOC_VENDOR_ID 0x1782 /* TOZED LT70-C based on UNISOC SL8563 uses UNISOC's vendor ID */ #define TOZED_PRODUCT_LT70C 0x4055 +#define UNISOC_PRODUCT_UIS7720 0x4064 /* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ #define LUAT_PRODUCT_AIR720U 0x4e00 @@ -2466,6 +2467,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9291, 0xff, 0xff, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, UNISOC_PRODUCT_UIS7720, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ .driver_info = NCTRL(1) }, From e46ee2f07e5848d7ec7aec38b72476dc7941b048 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 20 Oct 2025 16:54:13 +0200 Subject: [PATCH 085/263] spi: intel: Add support for 128M component density With the recent hardware the flash component density can be increased to 128M. Update the driver to support this. While there log a warning if we encounter an unsupported value in this field. Signed-off-by: Mika Westerberg Link: https://patch.msgid.link/20251020145415.3377022-2-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/spi/spi-intel.c b/drivers/spi/spi-intel.c index 13bbb2133507..1775ad39e633 100644 --- a/drivers/spi/spi-intel.c +++ b/drivers/spi/spi-intel.c @@ -132,6 +132,7 @@ #define FLCOMP_C0DEN_16M 0x05 #define FLCOMP_C0DEN_32M 0x06 #define FLCOMP_C0DEN_64M 0x07 +#define FLCOMP_C0DEN_128M 0x08 #define INTEL_SPI_TIMEOUT 5000 /* ms */ #define INTEL_SPI_FIFO_SZ 64 @@ -1347,7 +1348,12 @@ static int intel_spi_read_desc(struct intel_spi *ispi) case FLCOMP_C0DEN_64M: ispi->chip0_size = SZ_64M; break; + case FLCOMP_C0DEN_128M: + ispi->chip0_size = SZ_128M; + break; default: + dev_warn(ispi->dev, "unsupported C0DEN: %#lx\n", + flcomp & FLCOMP_C0DEN_MASK); return -EINVAL; } From bc25c6e0a4880b5ad70c31fe1466f30c9e4c8f52 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 20 Oct 2025 16:54:14 +0200 Subject: [PATCH 086/263] spi: intel-pci: Add support for Arrow Lake-H SPI serial flash Add Intel Arrow Lake-H PCI ID to the driver list of supported devices. This is the same controller found in previous generations. Signed-off-by: Mika Westerberg Link: https://patch.msgid.link/20251020145415.3377022-3-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 4b63cb98df9c..49b4d3061197 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -79,6 +79,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x5794), (unsigned long)&cnl_info }, + { PCI_VDEVICE(INTEL, 0x7723), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7a24), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x7e23), (unsigned long)&cnl_info }, From f7e37affbc9085f2b77ccb6596521a44eabf7505 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 20 Oct 2025 16:54:15 +0200 Subject: [PATCH 087/263] spi: intel-pci: Add support for Intel Wildcat Lake SPI serial flash Add Intel Wildcat Lake SPI serial flash PCI ID to the list of supported devices. Signed-off-by: Mika Westerberg Link: https://patch.msgid.link/20251020145415.3377022-4-mika.westerberg@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-intel-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-intel-pci.c b/drivers/spi/spi-intel-pci.c index 49b4d3061197..7765fb27c37c 100644 --- a/drivers/spi/spi-intel-pci.c +++ b/drivers/spi/spi-intel-pci.c @@ -75,6 +75,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x38a4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x43a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info }, + { PCI_VDEVICE(INTEL, 0x4d23), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x51a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x54a4), (unsigned long)&cnl_info }, From 4ec703ec0c384a2199808c4eb2e9037236285a8d Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Sat, 18 Oct 2025 12:32:54 -0700 Subject: [PATCH 088/263] io_uring: fix incorrect unlikely() usage in io_waitid_prep() The negation operator is incorrectly placed outside the unlikely() macro: if (!unlikely(iwa)) This inverts the compiler branch prediction hint, marking the NULL case as likely instead of unlikely. The intent is to indicate that allocation failures are rare, consistent with common kernel patterns. Moving the negation inside unlikely(): if (unlikely(!iwa)) Fixes: 2b4fc4cd43f2 ("io_uring/waitid: setup async data in the prep handler") Signed-off-by: Alok Tiwari Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Caleb Sander Mateos Signed-off-by: Jens Axboe --- io_uring/waitid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/waitid.c b/io_uring/waitid.c index f25110fb1b12..53532ae6256c 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -250,7 +250,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EINVAL; iwa = io_uring_alloc_async_data(NULL, req); - if (!unlikely(iwa)) + if (unlikely(!iwa)) return -ENOMEM; iwa->req = req; From 81ccca31214e11ea2b537fd35d4f66d7cf46268e Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Fri, 10 Oct 2025 10:09:00 +0200 Subject: [PATCH 089/263] nbd: override creds to kernel when calling sock_{send,recv}msg() sock_{send,recv}msg() internally calls security_socket_{send,recv}msg(), which does security checks (e.g. SELinux) for socket access against the current task. However, _sock_xmit() in drivers/block/nbd.c may be called indirectly from a userspace syscall, where the NBD socket access would be incorrectly checked against the calling userspace task (which simply tries to read/write a file that happens to reside on an NBD device). To fix this, temporarily override creds to kernel ones before calling the sock_*() functions. This allows the security modules to recognize this as internal access by the kernel, which will normally be allowed. A way to trigger the issue is to do the following (on a system with SELinux set to enforcing): ### Create nbd device: truncate -s 256M /tmp/testfile nbd-server localhost:10809 /tmp/testfile ### Connect to the nbd server: nbd-client localhost ### Create mdraid array mdadm --create -l 1 -n 2 /dev/md/testarray /dev/nbd0 missing After these steps, assuming the SELinux policy doesn't allow the unexpected access pattern, errors will be visible on the kernel console: [ 142.204243] nbd0: detected capacity change from 0 to 524288 [ 165.189967] md: async del_gendisk mode will be removed in future, please upgrade to mdadm-4.5+ [ 165.252299] md/raid1:md127: active with 1 out of 2 mirrors [ 165.252725] md127: detected capacity change from 0 to 522240 [ 165.255434] block nbd0: Send control failed (result -13) [ 165.255718] block nbd0: Request send failed, requeueing [ 165.256006] block nbd0: Dead connection, failed to find a fallback [ 165.256041] block nbd0: Receive control failed (result -32) [ 165.256423] block nbd0: shutting down sockets [ 165.257196] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.257736] Buffer I/O error on dev md127, logical block 0, async page read [ 165.258263] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.259376] Buffer I/O error on dev md127, logical block 0, async page read [ 165.259920] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.260628] Buffer I/O error on dev md127, logical block 0, async page read [ 165.261661] ldm_validate_partition_table(): Disk read failed. [ 165.262108] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.262769] Buffer I/O error on dev md127, logical block 0, async page read [ 165.263697] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.264412] Buffer I/O error on dev md127, logical block 0, async page read [ 165.265412] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.265872] Buffer I/O error on dev md127, logical block 0, async page read [ 165.266378] I/O error, dev nbd0, sector 2048 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.267168] Buffer I/O error on dev md127, logical block 0, async page read [ 165.267564] md127: unable to read partition table [ 165.269581] I/O error, dev nbd0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.269960] Buffer I/O error on dev nbd0, logical block 0, async page read [ 165.270316] I/O error, dev nbd0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.270913] Buffer I/O error on dev nbd0, logical block 0, async page read [ 165.271253] I/O error, dev nbd0, sector 0 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 2 [ 165.271809] Buffer I/O error on dev nbd0, logical block 0, async page read [ 165.272074] ldm_validate_partition_table(): Disk read failed. [ 165.272360] nbd0: unable to read partition table [ 165.289004] ldm_validate_partition_table(): Disk read failed. [ 165.289614] nbd0: unable to read partition table The corresponding SELinux denial on Fedora/RHEL will look like this (assuming it's not silenced): type=AVC msg=audit(1758104872.510:116): avc: denied { write } for pid=1908 comm="mdadm" laddr=::1 lport=32772 faddr=::1 fport=10809 scontext=system_u:system_r:mdadm_t:s0-s0:c0.c1023 tcontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tclass=tcp_socket permissive=0 The respective backtrace looks like this: @security[mdadm, -13, handshake_exit+221615650 handshake_exit+221615650 handshake_exit+221616465 security_socket_sendmsg+5 sock_sendmsg+106 handshake_exit+221616150 sock_sendmsg+5 __sock_xmit+162 nbd_send_cmd+597 nbd_handle_cmd+377 nbd_queue_rq+63 blk_mq_dispatch_rq_list+653 __blk_mq_do_dispatch_sched+184 __blk_mq_sched_dispatch_requests+333 blk_mq_sched_dispatch_requests+38 blk_mq_run_hw_queue+239 blk_mq_dispatch_plug_list+382 blk_mq_flush_plug_list.part.0+55 __blk_flush_plug+241 __submit_bio+353 submit_bio_noacct_nocheck+364 submit_bio_wait+84 __blkdev_direct_IO_simple+232 blkdev_read_iter+162 vfs_read+591 ksys_read+95 do_syscall_64+92 entry_SYSCALL_64_after_hwframe+120 ]: 1 The issue has started to appear since commit 060406c61c7c ("block: add plug while submitting IO"). Cc: Ming Lei Link: https://bugzilla.redhat.com/show_bug.cgi?id=2348878 Fixes: 060406c61c7c ("block: add plug while submitting IO") Signed-off-by: Ondrej Mosnacek Acked-by: Paul Moore Acked-by: Stephen Smalley Reviewed-by: Ming Lei Tested-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1188f32a5e5e..a853c65ac65d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -52,6 +52,7 @@ static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); static struct workqueue_struct *nbd_del_wq; +static struct cred *nbd_cred; static int nbd_total_devices = 0; struct nbd_sock { @@ -554,6 +555,7 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, int result; struct msghdr msg = {} ; unsigned int noreclaim_flag; + const struct cred *old_cred; if (unlikely(!sock)) { dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -562,6 +564,8 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, return -EINVAL; } + old_cred = override_creds(nbd_cred); + msg.msg_iter = *iter; noreclaim_flag = memalloc_noreclaim_save(); @@ -586,6 +590,8 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, memalloc_noreclaim_restore(noreclaim_flag); + revert_creds(old_cred); + return result; } @@ -2677,7 +2683,15 @@ static int __init nbd_init(void) return -ENOMEM; } + nbd_cred = prepare_kernel_cred(&init_task); + if (!nbd_cred) { + destroy_workqueue(nbd_del_wq); + unregister_blkdev(NBD_MAJOR, "nbd"); + return -ENOMEM; + } + if (genl_register_family(&nbd_genl_family)) { + put_cred(nbd_cred); destroy_workqueue(nbd_del_wq); unregister_blkdev(NBD_MAJOR, "nbd"); return -EINVAL; @@ -2732,6 +2746,7 @@ static void __exit nbd_cleanup(void) /* Also wait for nbd_dev_remove_work() completes */ destroy_workqueue(nbd_del_wq); + put_cred(nbd_cred); idr_destroy(&nbd_index_idr); unregister_blkdev(NBD_MAJOR, "nbd"); } From a1978b692a3953241842a89eaa0026158f306cf1 Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Fri, 17 Oct 2025 17:10:53 +0530 Subject: [PATCH 090/263] PCI: dwc: Use custom pci_ops for root bus DBI vs ECAM config access When the vendor configuration space is 256MB aligned, the DesignWare PCIe host driver enables ECAM access and sets the DBI base to the start of the config space. This causes vendor drivers to incorrectly program iATU regions, as they rely on the DBI address for internal accesses. To fix this, avoid overwriting the DBI base when ECAM is enabled. Instead, introduce a custom pci_ops that accesses the DBI region directly for the root bus and uses ECAM for other buses. Fixes: f6fd357f7afb ("PCI: dwc: Prepare the driver for enabling ECAM mechanism using iATU 'CFG Shift Feature'") Reported-by: Ron Economos Closes: https://lore.kernel.org/all/eac81c57-1164-4d74-a1b4-6f353c577731@w6rz.net/ Suggested-by: Manivannan Sadhasivam Signed-off-by: Krishna Chaitanya Chundru [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Tested-by: Ron Economos Link: https://patch.msgid.link/20251017-ecam_fix-v1-1-f6faa3d0edf3@oss.qualcomm.com --- .../pci/controller/dwc/pcie-designware-host.c | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 20c9333bcb1c..e92513c5bda5 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -23,6 +23,7 @@ #include "pcie-designware.h" static struct pci_ops dw_pcie_ops; +static struct pci_ops dw_pcie_ecam_ops; static struct pci_ops dw_child_pcie_ops; #define DW_PCIE_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | \ @@ -471,9 +472,6 @@ static int dw_pcie_create_ecam_window(struct dw_pcie_rp *pp, struct resource *re if (IS_ERR(pp->cfg)) return PTR_ERR(pp->cfg); - pci->dbi_base = pp->cfg->win; - pci->dbi_phys_addr = res->start; - return 0; } @@ -529,7 +527,7 @@ static int dw_pcie_host_get_resources(struct dw_pcie_rp *pp) if (ret) return ret; - pp->bridge->ops = (struct pci_ops *)&pci_generic_ecam_ops.pci_ops; + pp->bridge->ops = &dw_pcie_ecam_ops; pp->bridge->sysdata = pp->cfg; pp->cfg->priv = pp; } else { @@ -842,12 +840,34 @@ void __iomem *dw_pcie_own_conf_map_bus(struct pci_bus *bus, unsigned int devfn, } EXPORT_SYMBOL_GPL(dw_pcie_own_conf_map_bus); +static void __iomem *dw_pcie_ecam_conf_map_bus(struct pci_bus *bus, unsigned int devfn, int where) +{ + struct pci_config_window *cfg = bus->sysdata; + struct dw_pcie_rp *pp = cfg->priv; + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + unsigned int busn = bus->number; + + if (busn > 0) + return pci_ecam_map_bus(bus, devfn, where); + + if (PCI_SLOT(devfn) > 0) + return NULL; + + return pci->dbi_base + where; +} + static struct pci_ops dw_pcie_ops = { .map_bus = dw_pcie_own_conf_map_bus, .read = pci_generic_config_read, .write = pci_generic_config_write, }; +static struct pci_ops dw_pcie_ecam_ops = { + .map_bus = dw_pcie_ecam_conf_map_bus, + .read = pci_generic_config_read, + .write = pci_generic_config_write, +}; + static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); From fc2bc2623e3a099165b02d13567d21fabb5ea54d Mon Sep 17 00:00:00 2001 From: Krishna Chaitanya Chundru Date: Fri, 17 Oct 2025 17:10:54 +0530 Subject: [PATCH 091/263] Revert "PCI: qcom: Prepare for the DWC ECAM enablement" This reverts commit 4660e50cf81800f82eeecf743ad1e3e97ab72190. Commit f6fd357f7afb ("PCI: dwc: Prepare the driver for enabling ECAM mechanism using iATU 'CFG Shift Feature'") enabled ECAM access by using the config space start as DBI address. However, this approach breaks vendor drivers that rely on the DBI address for internal accesses, especially when the vendor config space is 256MB aligned. To resolve this, avoid using the DBI as the start of config space and instead introduce a custom ECAM PCI ops implementation. Revert the qcom specific ECAM preparation logic in 4660e50cf818 ("PCI: qcom: Prepare for the DWC ECAM enablement") since it's no longer necessary. Signed-off-by: Krishna Chaitanya Chundru [bhelgaas: commit log] Signed-off-by: Bjorn Helgaas Link: https://patch.msgid.link/20251017-ecam_fix-v1-2-f6faa3d0edf3@oss.qualcomm.com --- drivers/pci/controller/dwc/pcie-qcom.c | 68 -------------------------- 1 file changed, 68 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 805edbbfe7eb..6948824642dc 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -55,7 +55,6 @@ #define PARF_AXI_MSTR_WR_ADDR_HALT_V2 0x1a8 #define PARF_Q2A_FLUSH 0x1ac #define PARF_LTSSM 0x1b0 -#define PARF_SLV_DBI_ELBI 0x1b4 #define PARF_INT_ALL_STATUS 0x224 #define PARF_INT_ALL_CLEAR 0x228 #define PARF_INT_ALL_MASK 0x22c @@ -65,16 +64,6 @@ #define PARF_DBI_BASE_ADDR_V2_HI 0x354 #define PARF_SLV_ADDR_SPACE_SIZE_V2 0x358 #define PARF_SLV_ADDR_SPACE_SIZE_V2_HI 0x35c -#define PARF_BLOCK_SLV_AXI_WR_BASE 0x360 -#define PARF_BLOCK_SLV_AXI_WR_BASE_HI 0x364 -#define PARF_BLOCK_SLV_AXI_WR_LIMIT 0x368 -#define PARF_BLOCK_SLV_AXI_WR_LIMIT_HI 0x36c -#define PARF_BLOCK_SLV_AXI_RD_BASE 0x370 -#define PARF_BLOCK_SLV_AXI_RD_BASE_HI 0x374 -#define PARF_BLOCK_SLV_AXI_RD_LIMIT 0x378 -#define PARF_BLOCK_SLV_AXI_RD_LIMIT_HI 0x37c -#define PARF_ECAM_BASE 0x380 -#define PARF_ECAM_BASE_HI 0x384 #define PARF_NO_SNOOP_OVERRIDE 0x3d4 #define PARF_ATU_BASE_ADDR 0x634 #define PARF_ATU_BASE_ADDR_HI 0x638 @@ -98,7 +87,6 @@ /* PARF_SYS_CTRL register fields */ #define MAC_PHY_POWERDOWN_IN_P2_D_MUX_EN BIT(29) -#define PCIE_ECAM_BLOCKER_EN BIT(26) #define MST_WAKEUP_EN BIT(13) #define SLV_WAKEUP_EN BIT(12) #define MSTR_ACLK_CGC_DIS BIT(10) @@ -146,9 +134,6 @@ /* PARF_LTSSM register fields */ #define LTSSM_EN BIT(8) -/* PARF_SLV_DBI_ELBI */ -#define SLV_DBI_ELBI_ADDR_BASE GENMASK(11, 0) - /* PARF_INT_ALL_{STATUS/CLEAR/MASK} register fields */ #define PARF_INT_ALL_LINK_UP BIT(13) #define PARF_INT_MSI_DEV_0_7 GENMASK(30, 23) @@ -326,47 +311,6 @@ static void qcom_ep_reset_deassert(struct qcom_pcie *pcie) qcom_perst_assert(pcie, false); } -static void qcom_pci_config_ecam(struct dw_pcie_rp *pp) -{ - struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct qcom_pcie *pcie = to_qcom_pcie(pci); - u64 addr, addr_end; - u32 val; - - writel_relaxed(lower_32_bits(pci->dbi_phys_addr), pcie->parf + PARF_ECAM_BASE); - writel_relaxed(upper_32_bits(pci->dbi_phys_addr), pcie->parf + PARF_ECAM_BASE_HI); - - /* - * The only device on the root bus is a single Root Port. If we try to - * access any devices other than Device/Function 00.0 on Bus 0, the TLP - * will go outside of the controller to the PCI bus. But with CFG Shift - * Feature (ECAM) enabled in iATU, there is no guarantee that the - * response is going to be all F's. Hence, to make sure that the - * requester gets all F's response for accesses other than the Root - * Port, configure iATU to block the transactions starting from - * function 1 of the root bus to the end of the root bus (i.e., from - * dbi_base + 4KB to dbi_base + 1MB). - */ - addr = pci->dbi_phys_addr + SZ_4K; - writel_relaxed(lower_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_WR_BASE); - writel_relaxed(upper_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_WR_BASE_HI); - - writel_relaxed(lower_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_RD_BASE); - writel_relaxed(upper_32_bits(addr), pcie->parf + PARF_BLOCK_SLV_AXI_RD_BASE_HI); - - addr_end = pci->dbi_phys_addr + SZ_1M - 1; - - writel_relaxed(lower_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_WR_LIMIT); - writel_relaxed(upper_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_WR_LIMIT_HI); - - writel_relaxed(lower_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_RD_LIMIT); - writel_relaxed(upper_32_bits(addr_end), pcie->parf + PARF_BLOCK_SLV_AXI_RD_LIMIT_HI); - - val = readl_relaxed(pcie->parf + PARF_SYS_CTRL); - val |= PCIE_ECAM_BLOCKER_EN; - writel_relaxed(val, pcie->parf + PARF_SYS_CTRL); -} - static int qcom_pcie_start_link(struct dw_pcie *pci) { struct qcom_pcie *pcie = to_qcom_pcie(pci); @@ -1320,7 +1264,6 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct qcom_pcie *pcie = to_qcom_pcie(pci); - u16 offset; int ret; qcom_ep_reset_assert(pcie); @@ -1329,17 +1272,6 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp) if (ret) return ret; - if (pp->ecam_enabled) { - /* - * Override ELBI when ECAM is enabled, as when ECAM is enabled, - * ELBI moves under the 'config' space. - */ - offset = FIELD_GET(SLV_DBI_ELBI_ADDR_BASE, readl(pcie->parf + PARF_SLV_DBI_ELBI)); - pci->elbi_base = pci->dbi_base + offset; - - qcom_pci_config_ecam(pp); - } - ret = qcom_pcie_phy_power_on(pcie); if (ret) goto err_deinit; From 19de7113bfac33ba92c004a9b510612bb745cfa0 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Thu, 16 Oct 2025 08:34:19 -0500 Subject: [PATCH 092/263] x86,fs/resctrl: Fix NULL pointer dereference with events force-disabled in mbm_event mode The following NULL pointer dereference is encountered on mount of resctrl fs after booting a system that supports assignable counters with the "rdt=!mbmtotal,!mbmlocal" kernel parameters: BUG: kernel NULL pointer dereference, address: 0000000000000008 RIP: 0010:mbm_cntr_get Call Trace: rdtgroup_assign_cntr_event rdtgroup_assign_cntrs rdt_get_tree Specifying the kernel parameter "rdt=!mbmtotal,!mbmlocal" effectively disables the legacy X86_FEATURE_CQM_MBM_TOTAL and X86_FEATURE_CQM_MBM_LOCAL features and the MBM events they represent. This results in the per-domain MBM event related data structures to not be allocated during early initialization. resctrl fs initialization follows by implicitly enabling both MBM total and local events on a system that supports assignable counters (mbm_event mode), but this enabling occurs after the per-domain data structures have been created. After booting, resctrl fs assumes that an enabled event can access all its state. This results in NULL pointer dereference when resctrl attempts to access the un-allocated structures of an enabled event. Remove the late MBM event enabling from resctrl fs. This leaves a problem where the X86_FEATURE_CQM_MBM_TOTAL and X86_FEATURE_CQM_MBM_LOCAL features may be disabled while assignable counter (mbm_event) mode is enabled without any events to support. Switching between the "default" and "mbm_event" mode without any events is not practical. Create a dependency between the X86_FEATURE_{CQM_MBM_TOTAL,CQM_MBM_LOCAL} and X86_FEATURE_ABMC (assignable counter) hardware features. An x86 system that supports assignable counters now requires support of X86_FEATURE_CQM_MBM_TOTAL or X86_FEATURE_CQM_MBM_LOCAL. This ensures all needed MBM related data structures are created before use and that it is only possible to switch between "default" and "mbm_event" mode when the same events are available in both modes. This dependency does not exist in the hardware but this usage of these feature settings work for known systems. [ bp: Massage commit message. ] Fixes: 13390861b426e ("x86,fs/resctrl: Detect Assignable Bandwidth Monitoring feature details") Co-developed-by: Reinette Chatre Signed-off-by: Reinette Chatre Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://patch.msgid.link/a62e6ac063d0693475615edd213d5be5e55443e6.1760560934.git.babu.moger@amd.com --- arch/x86/kernel/cpu/resctrl/monitor.c | 11 ++++++++++- fs/resctrl/monitor.c | 16 +++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 2cd25a0d4637..fe1a2aa53c16 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -458,7 +458,16 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r) r->mon.mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS; } - if (rdt_cpu_has(X86_FEATURE_ABMC)) { + /* + * resctrl assumes a system that supports assignable counters can + * switch to "default" mode. Ensure that there is a "default" mode + * to switch to. This enforces a dependency between the independent + * X86_FEATURE_ABMC and X86_FEATURE_CQM_MBM_TOTAL/X86_FEATURE_CQM_MBM_LOCAL + * hardware features. + */ + if (rdt_cpu_has(X86_FEATURE_ABMC) && + (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL) || + rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))) { r->mon.mbm_cntr_assignable = true; cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx); r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1; diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c index 4076336fbba6..572a9925bd6c 100644 --- a/fs/resctrl/monitor.c +++ b/fs/resctrl/monitor.c @@ -1782,15 +1782,13 @@ int resctrl_mon_resource_init(void) mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID; if (r->mon.mbm_cntr_assignable) { - if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) - resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID); - if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) - resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID); - mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; - mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & - (READS_TO_LOCAL_MEM | - READS_TO_LOCAL_S_MEM | - NON_TEMP_WRITE_TO_LOCAL_MEM); + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask; + if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) + mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask & + (READS_TO_LOCAL_MEM | + READS_TO_LOCAL_S_MEM | + NON_TEMP_WRITE_TO_LOCAL_MEM); r->mon.mbm_assign_on_mkdir = true; resctrl_file_fflags_init("num_mbm_cntrs", RFTYPE_MON_INFO | RFTYPE_RES_CACHE); From 789e46fbfca1875671717a20a916ca1a920268e4 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Oct 2025 12:51:35 +0300 Subject: [PATCH 093/263] drm/i915/panic: fix panic structure allocation memory leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separating the panic allocation from framebuffer allocation in commit 729c5f7ffa83 ("drm/{i915,xe}/panic: move framebuffer allocation where it belongs") failed to deallocate the panic structure anywhere. The fix is two-fold. First, free the panic structure in intel_user_framebuffer_destroy() in the general case. Second, move the panic allocation later to intel_framebuffer_init() to not leak the panic structure in error paths (if any, now or later) between intel_framebuffer_alloc() and intel_framebuffer_init(). v2: Rebase Fixes: 729c5f7ffa83 ("drm/{i915,xe}/panic: move framebuffer allocation where it belongs") Cc: Jocelyn Falempe Cc: Maarten Lankhorst Reported-by: Michał Grzelak Suggested-by: Ville Syrjälä Tested-by: Michał Grzelak # v1 Reviewed-by: Jocelyn Falempe Link: https://lore.kernel.org/r/20251015095135.2183415-1-jani.nikula@intel.com Signed-off-by: Jani Nikula (cherry picked from commit 8f8ef09fcf6a3b00369bfc704e8f68d7474eca94) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_fb.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index b817ff44c043..c48384e58ea1 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -2117,6 +2117,7 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) intel_frontbuffer_put(intel_fb->frontbuffer); + kfree(intel_fb->panic); kfree(intel_fb); } @@ -2215,16 +2216,22 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct intel_display *display = to_intel_display(obj->dev); struct drm_framebuffer *fb = &intel_fb->base; u32 max_stride; - int ret = -EINVAL; + int ret; int i; + intel_fb->panic = intel_panic_alloc(); + if (!intel_fb->panic) + return -ENOMEM; + /* * intel_frontbuffer_get() must be done before * intel_fb_bo_framebuffer_init() to avoid set_tiling vs. addfb race. */ intel_fb->frontbuffer = intel_frontbuffer_get(obj); - if (!intel_fb->frontbuffer) - return -ENOMEM; + if (!intel_fb->frontbuffer) { + ret = -ENOMEM; + goto err_free_panic; + } ret = intel_fb_bo_framebuffer_init(fb, obj, mode_cmd); if (ret) @@ -2323,6 +2330,9 @@ int intel_framebuffer_init(struct intel_framebuffer *intel_fb, intel_fb_bo_framebuffer_fini(obj); err_frontbuffer_put: intel_frontbuffer_put(intel_fb->frontbuffer); +err_free_panic: + kfree(intel_fb->panic); + return ret; } @@ -2349,20 +2359,11 @@ intel_user_framebuffer_create(struct drm_device *dev, struct intel_framebuffer *intel_framebuffer_alloc(void) { struct intel_framebuffer *intel_fb; - struct intel_panic *panic; intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL); if (!intel_fb) return NULL; - panic = intel_panic_alloc(); - if (!panic) { - kfree(intel_fb); - return NULL; - } - - intel_fb->panic = panic; - return intel_fb; } From 39a9ed0fb6dac58547afdf9b6cb032d326a3698f Mon Sep 17 00:00:00 2001 From: Haofeng Li Date: Wed, 15 Oct 2025 14:17:53 +0800 Subject: [PATCH 094/263] timekeeping: Fix aux clocks sysfs initialization loop bound The loop in tk_aux_sysfs_init() uses `i <= MAX_AUX_CLOCKS` as the termination condition, which results in 9 iterations (i=0 to 8) when MAX_AUX_CLOCKS is defined as 8. However, the kernel is designed to support only up to 8 auxiliary clocks. This off-by-one error causes the creation of a 9th sysfs entry that exceeds the intended auxiliary clock range. Fix the loop bound to use `i < MAX_AUX_CLOCKS` to ensure exactly 8 auxiliary clock entries are created, matching the design specification. Fixes: 7b95663a3d96 ("timekeeping: Provide interface to control auxiliary clocks") Signed-off-by: Haofeng Li Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/tencent_2376993D9FC06A3616A4F981B3DE1C599607@qq.com --- kernel/time/timekeeping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index b6974fce800c..3a4d3b2e3f74 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -3070,7 +3070,7 @@ static int __init tk_aux_sysfs_init(void) return -ENOMEM; } - for (int i = 0; i <= MAX_AUX_CLOCKS; i++) { + for (int i = 0; i < MAX_AUX_CLOCKS; i++) { char id[2] = { [0] = '0' + i, }; struct kobject *clk = kobject_create_and_add(id, auxo); From 5da6fb6356362c7eb40ed931b27abc31b3582950 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 20 Oct 2025 10:33:06 +0100 Subject: [PATCH 095/263] cifs: Add a couple of missing smb3_rw_credits tracepoints Add missing smb3_rw_credits tracepoints to cifs_readv_callback() (for SMB1) to match those of SMB2/3. Signed-off-by: David Howells cc: Steve French cc: Paulo Alcantara cc: Shyam Prasad N cc: Tom Talpey cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifssmb.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 2881efcbe09a..7da194f29fef 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1311,6 +1311,8 @@ cifs_readv_callback(struct mid_q_entry *mid) .rreq_debug_id = rdata->rreq->debug_id, .rreq_debug_index = rdata->subreq.debug_index, }; + unsigned int rreq_debug_id = rdata->rreq->debug_id; + unsigned int subreq_debug_index = rdata->subreq.debug_index; cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n", __func__, mid->mid, mid->mid_state, rdata->result, @@ -1374,6 +1376,9 @@ cifs_readv_callback(struct mid_q_entry *mid) __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags); } + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value, + server->credits, server->in_flight, + 0, cifs_trace_rw_credits_read_response_clear); rdata->credits.value = 0; rdata->subreq.error = rdata->result; rdata->subreq.transferred += rdata->got_bytes; @@ -1381,6 +1386,9 @@ cifs_readv_callback(struct mid_q_entry *mid) netfs_read_subreq_terminated(&rdata->subreq); release_mid(mid); add_credits(server, &credits, 0); + trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0, + server->credits, server->in_flight, + credits.value, cifs_trace_rw_credits_read_response_add); } /* cifs_async_readv - send an async write, and set up mid to handle result */ From 9a3c0d6834194b6e3cce4ffbb55f800c6cb58c86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 15 Oct 2025 19:07:25 +0200 Subject: [PATCH 096/263] drm/xe: Retain vma flags when recreating and splitting vmas for madvise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When splitting and restoring vmas for madvise, we only copied the XE_VMA_SYSTEM_ALLOCATOR flag. That meant we lost flags for read_only, dumpable and sparse (in case anyone would call madvise for the latter). Instead, define a mask of relevant flags and ensure all are replicated, To simplify this and make the code a bit less fragile, remove the conversion to VMA_CREATE flags and instead just pass around the gpuva flags after initial conversion from user-space. Fixes: a2eb8aec3ebe ("drm/xe: Reset VMA attributes to default in SVM garbage collector") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Link: https://lore.kernel.org/r/20251015170726.178685-1-thomas.hellstrom@linux.intel.com (cherry picked from commit b3af8658ec70f2196190c66103478352286aba3b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_pt.c | 4 +- drivers/gpu/drm/xe/xe_vm.c | 86 +++++++++++--------------------- drivers/gpu/drm/xe/xe_vm_types.h | 9 +--- 3 files changed, 32 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index a1c88f9a6c76..07f96bda638a 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -2022,7 +2022,7 @@ static int op_prepare(struct xe_vm *vm, case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm) && !op->map.invalidate_on_bind) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma, @@ -2252,7 +2252,7 @@ static void op_commit(struct xe_vm *vm, switch (op->base.op) { case DRM_GPUVA_OP_MAP: if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) || - op->map.is_cpu_addr_mirror) + (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) break; bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence, diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f602b874e054..f7a0931eb66c 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -616,6 +616,12 @@ static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, vops->pt_update_ops[i].num_ops += inc_val; } +#define XE_VMA_CREATE_MASK ( \ + XE_VMA_READ_ONLY | \ + XE_VMA_DUMPABLE | \ + XE_VMA_SYSTEM_ALLOCATOR | \ + DRM_GPUVA_SPARSE) + static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) { @@ -628,8 +634,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, op->base.map.gem.offset = vma->gpuva.gem.offset; op->map.vma = vma; op->map.immediate = true; - op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE; - op->map.is_null = xe_vma_is_null(vma); + op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK; } static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma, @@ -932,11 +937,6 @@ static void xe_vma_free(struct xe_vma *vma) kfree(vma); } -#define VMA_CREATE_FLAG_READ_ONLY BIT(0) -#define VMA_CREATE_FLAG_IS_NULL BIT(1) -#define VMA_CREATE_FLAG_DUMPABLE BIT(2) -#define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3) - static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, u64 bo_offset_or_userptr, @@ -947,11 +947,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_vma *vma; struct xe_tile *tile; u8 id; - bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); - bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); - bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); - bool is_cpu_addr_mirror = - (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR); + bool is_null = (flags & DRM_GPUVA_SPARSE); + bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -972,10 +969,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, if (!vma) return ERR_PTR(-ENOMEM); - if (is_cpu_addr_mirror) - vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR; - if (is_null) - vma->gpuva.flags |= DRM_GPUVA_SPARSE; if (bo) vma->gpuva.gem.obj = &bo->ttm.base; } @@ -986,10 +979,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.vm = &vm->gpuvm; vma->gpuva.va.addr = start; vma->gpuva.va.range = end - start + 1; - if (read_only) - vma->gpuva.flags |= XE_VMA_READ_ONLY; - if (dumpable) - vma->gpuva.flags |= XE_VMA_DUMPABLE; + vma->gpuva.flags = flags; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -2272,12 +2262,14 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, if (__op->op == DRM_GPUVA_OP_MAP) { op->map.immediate = flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE; - op->map.read_only = - flags & DRM_XE_VM_BIND_FLAG_READONLY; - op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; - op->map.is_cpu_addr_mirror = flags & - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR; - op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_READONLY) + op->map.vma_flags |= XE_VMA_READ_ONLY; + if (flags & DRM_XE_VM_BIND_FLAG_NULL) + op->map.vma_flags |= DRM_GPUVA_SPARSE; + if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; + if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) + op->map.vma_flags |= XE_VMA_DUMPABLE; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); @@ -2590,14 +2582,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, .pat_index = op->map.pat_index, }; - flags |= op->map.read_only ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->map.is_null ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->map.dumpable ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= op->map.is_cpu_addr_mirror ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; + flags |= op->map.vma_flags & XE_VMA_CREATE_MASK; vma = new_vma(vm, &op->base.map, &default_attr, flags); @@ -2606,7 +2591,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->map.vma = vma; if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) && - !op->map.is_cpu_addr_mirror) || + !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) || op->map.invalidate_on_bind) xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1); @@ -2637,18 +2622,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops, op->remap.start = xe_vma_start(old); op->remap.range = xe_vma_size(old); - flags |= op->base.remap.unmap->va->flags & - XE_VMA_READ_ONLY ? - VMA_CREATE_FLAG_READ_ONLY : 0; - flags |= op->base.remap.unmap->va->flags & - DRM_GPUVA_SPARSE ? - VMA_CREATE_FLAG_IS_NULL : 0; - flags |= op->base.remap.unmap->va->flags & - XE_VMA_DUMPABLE ? - VMA_CREATE_FLAG_DUMPABLE : 0; - flags |= xe_vma_is_cpu_addr_mirror(old) ? - VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0; - + flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK; if (op->base.remap.prev) { vma = new_vma(vm, op->base.remap.prev, &old->attr, flags); @@ -4212,7 +4186,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, struct xe_vma_ops vops; struct drm_gpuva_ops *ops = NULL; struct drm_gpuva_op *__op; - bool is_cpu_addr_mirror = false; + unsigned int vma_flags = 0; bool remap_op = false; struct xe_vma_mem_attr tmp_attr; u16 default_pat; @@ -4242,15 +4216,17 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, vma = gpuva_to_vma(op->base.unmap.va); XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma)); default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_REMAP) { vma = gpuva_to_vma(op->base.remap.unmap->va); default_pat = vma->attr.default_pat_index; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_MAP) { - op->map.is_cpu_addr_mirror = true; + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; op->map.pat_index = default_pat; } } else { @@ -4259,11 +4235,7 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, xe_assert(vm->xe, !remap_op); xe_assert(vm->xe, xe_vma_has_no_bo(vma)); remap_op = true; - - if (xe_vma_is_cpu_addr_mirror(vma)) - is_cpu_addr_mirror = true; - else - is_cpu_addr_mirror = false; + vma_flags = vma->gpuva.flags; } if (__op->op == DRM_GPUVA_OP_MAP) { @@ -4272,10 +4244,10 @@ static int xe_vm_alloc_vma(struct xe_vm *vm, /* * In case of madvise ops DRM_GPUVA_OP_MAP is * always after DRM_GPUVA_OP_REMAP, so ensure - * we assign op->map.is_cpu_addr_mirror true - * if REMAP is for xe_vma_is_cpu_addr_mirror vma + * to propagate the flags from the vma we're + * unmapping. */ - op->map.is_cpu_addr_mirror = is_cpu_addr_mirror; + op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK; } } print_op(vm->xe, __op); diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index 413353e1c225..a3b422b27ae8 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -345,17 +345,10 @@ struct xe_vm { struct xe_vma_op_map { /** @vma: VMA to map */ struct xe_vma *vma; + unsigned int vma_flags; /** @immediate: Immediate bind */ bool immediate; /** @read_only: Read only */ - bool read_only; - /** @is_null: is NULL binding */ - bool is_null; - /** @is_cpu_addr_mirror: is CPU address mirror binding */ - bool is_cpu_addr_mirror; - /** @dumpable: whether BO is dumped on GPU hang */ - bool dumpable; - /** @invalidate: invalidate the VMA before bind */ bool invalidate_on_bind; /** @pat_index: The pat index to use for this operation. */ u16 pat_index; From ce831bffcef3d8f9691b5537d74ffa1b1256c017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Wed, 15 Oct 2025 19:07:26 +0200 Subject: [PATCH 097/263] drm/xe/uapi: Hide the madvise autoreset behind a VM_BIND flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The madvise implementation currently resets the SVM madvise if the underlying CPU map is unmapped. This is in an attempt to mimic the CPU madvise behaviour. However, it's not clear that this is a desired behaviour since if the end app user relies on it for malloc()ed objects or stack objects, it may not work as intended. Instead of having the autoreset functionality being a direct application-facing implicit UAPI, make the UMD explicitly choose this behaviour if it wants to expose it by introducing DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET, and add a semantics description. v2: - Kerneldoc fixes. Fix a commit log message. Fixes: a2eb8aec3ebe ("drm/xe: Reset VMA attributes to default in SVM garbage collector") Cc: Matthew Brost Cc: Himal Prasad Ghimiray Cc: "Falkowski, John" Cc: "Mrozek, Michal" Signed-off-by: Thomas Hellström Reviewed-by: Himal Prasad Ghimiray Link: https://lore.kernel.org/r/20251015170726.178685-2-thomas.hellstrom@linux.intel.com (cherry picked from commit 59a2d3f38ab23cce4cd9f0c4a5e08fdfe9e67ae7) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 5 +++++ drivers/gpu/drm/xe/xe_vm.c | 12 +++++++++--- drivers/gpu/drm/xe/xe_vm_types.h | 1 + include/uapi/drm/xe_drm.h | 15 +++++++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index da2a412f80c0..129e7818565c 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -302,6 +302,11 @@ static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 if (!vma) return -EINVAL; + if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) { + drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n"); + return 0; + } + if (xe_vma_has_default_mem_attrs(vma)) return 0; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index f7a0931eb66c..63c65e3d207b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -620,7 +620,8 @@ static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, XE_VMA_READ_ONLY | \ XE_VMA_DUMPABLE | \ XE_VMA_SYSTEM_ALLOCATOR | \ - DRM_GPUVA_SPARSE) + DRM_GPUVA_SPARSE | \ + XE_VMA_MADV_AUTORESET) static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma, u8 tile_mask) @@ -2270,6 +2271,8 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops, op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR; if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE) op->map.vma_flags |= XE_VMA_DUMPABLE; + if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) + op->map.vma_flags |= XE_VMA_MADV_AUTORESET; op->map.pat_index = pat_index; op->map.invalidate_on_bind = __xe_vm_needs_clear_scratch_pages(vm, flags); @@ -3253,7 +3256,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); DRM_XE_VM_BIND_FLAG_NULL | \ DRM_XE_VM_BIND_FLAG_DUMPABLE | \ DRM_XE_VM_BIND_FLAG_CHECK_PXP | \ - DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR) + DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \ + DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) #ifdef TEST_VM_OPS_ERROR #define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR) @@ -3368,7 +3372,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm, XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC && !(BIT(prefetch_region) & xe->info.mem_region_mask))) || XE_IOCTL_DBG(xe, obj && - op == DRM_XE_VM_BIND_OP_UNMAP)) { + op == DRM_XE_VM_BIND_OP_UNMAP) || + XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) && + (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) { err = -EINVAL; goto free_bind_ops; } diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h index a3b422b27ae8..d6e2a0fdd4b3 100644 --- a/drivers/gpu/drm/xe/xe_vm_types.h +++ b/drivers/gpu/drm/xe/xe_vm_types.h @@ -46,6 +46,7 @@ struct xe_vm_pgtable_update_op; #define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7) #define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8) #define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9) +#define XE_VMA_MADV_AUTORESET (DRM_GPUVA_USERBITS << 10) /** * struct xe_vma_mem_attr - memory attributes associated with vma diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 40ff19f52a8d..517489a7ec60 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1013,6 +1013,20 @@ struct drm_xe_vm_destroy { * valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address * mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO * handle MBZ, and the BO offset MBZ. + * - %DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET - Can be used in combination with + * %DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR to reset madvises when the underlying + * CPU address space range is unmapped (typically with munmap(2) or brk(2)). + * The madvise values set with &DRM_IOCTL_XE_MADVISE are reset to the values + * that were present immediately after the &DRM_IOCTL_XE_VM_BIND. + * The reset GPU virtual address range is the intersection of the range bound + * using &DRM_IOCTL_XE_VM_BIND and the virtual CPU address space range + * unmapped. + * This functionality is present to mimic the behaviour of CPU address space + * madvises set using madvise(2), which are typically reset on unmap. + * Note: free(3) may or may not call munmap(2) and/or brk(2), and may thus + * not invoke autoreset. Neither will stack variables going out of scope. + * Therefore it's recommended to always explicitly reset the madvises when + * freeing the memory backing a region used in a &DRM_IOCTL_XE_MADVISE call. * * The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be: * - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in @@ -1119,6 +1133,7 @@ struct drm_xe_vm_bind_op { #define DRM_XE_VM_BIND_FLAG_DUMPABLE (1 << 3) #define DRM_XE_VM_BIND_FLAG_CHECK_PXP (1 << 4) #define DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR (1 << 5) +#define DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET (1 << 6) /** @flags: Bind flags */ __u32 flags; From 914f377075d646b4695a7868ba090f4c714dfd4b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 13 Oct 2025 12:08:29 +0900 Subject: [PATCH 098/263] xfs: Improve CONFIG_XFS_RT Kconfig help Improve the description of the XFS_RT configuration option to document that this option is required for zoned block devices. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index 8930d5254e1d..d66d517c99a9 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -119,6 +119,15 @@ config XFS_RT See the xfs man page in section 5 for additional information. + This option is mandatory to support zoned block devices. For these + devices, the realtime subvolume must be backed by a zoned block + device and a regular block device used as the main device (for + metadata). If the zoned block device is a host-managed SMR hard-disk + containing conventional zones at the beginning of its address space, + XFS will use the disk conventional zones as the main device and the + remaining sequential write required zones as the backing storage for + the realtime subvolume. + If unsure, say N. config XFS_DRAIN_INTENTS From b00bcb190eef35ae4da3c424b8a72f287e69f650 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 14 Oct 2025 13:19:45 +0900 Subject: [PATCH 099/263] xfs: do not tightly pack-write large files When using a zoned realtime device, tightly packing of data blocks belonging to multiple closed files into the same realtime group (RTG) is very efficient at improving write performance. This is especially true with SMR HDDs as this can reduce, and even suppress, disk head seeks. However, such tight packing does not make sense for large files that require at least a full RTG. If tight packing placement is applied for such files, the VM writeback thread switching between inodes result in the large files to be fragmented, thus increasing the garbage collection penalty later when the RTG needs to be reclaimed. This problem can be avoided with a simple heuristic: if the size of the inode being written back is at least equal to the RTG size, do not use tight-packing. Modify xfs_zoned_pack_tight() to always return false in this case. With this change, a multi-writer workload writing files of 256 MB on a file system backed by an SMR HDD with 256 MB zone size as a realtime device sees all files occupying exactly one RTG (i.e. one device zone), thus completely removing the heavy fragmentation observed without this change. Signed-off-by: Damien Le Moal Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_alloc.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 1147bacb2da8..1b462cd5d8fa 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -614,14 +614,25 @@ static inline enum rw_hint xfs_inode_write_hint(struct xfs_inode *ip) } /* - * Try to pack inodes that are written back after they were closed tight instead - * of trying to open new zones for them or spread them to the least recently - * used zone. This optimizes the data layout for workloads that untar or copy - * a lot of small files. Right now this does not separate multiple such + * Try to tightly pack small files that are written back after they were closed + * instead of trying to open new zones for them or spread them to the least + * recently used zone. This optimizes the data layout for workloads that untar + * or copy a lot of small files. Right now this does not separate multiple such * streams. */ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip) { + struct xfs_mount *mp = ip->i_mount; + size_t zone_capacity = + XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].blocks); + + /* + * Do not pack write files that are already using a full zone to avoid + * fragmentation. + */ + if (i_size_read(VFS_I(ip)) >= zone_capacity) + return false; + return !inode_is_open_for_write(VFS_I(ip)) && !(ip->i_diflags & XFS_DIFLAG_APPEND); } From f5caeb3689ea2d8a8c0790d9eea68b63e8f15496 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 13 Oct 2025 10:48:46 +0200 Subject: [PATCH 100/263] xfs: XFS_ONLINE_SCRUB_STATS should depend on DEBUG_FS Currently, XFS_ONLINE_SCRUB_STATS selects DEBUG_FS. However, DEBUG_FS is meant for debugging, and people may want to disable it on production systems. Since commit 0ff51a1fd786f47b ("xfs: enable online fsck by default in Kconfig")), XFS_ONLINE_SCRUB_STATS is enabled by default, forcing DEBUG_FS enabled too. Fix this by replacing the selection of DEBUG_FS by a dependency on DEBUG_FS, which is what most other options controlling the gathering and exposing of statistics do. Signed-off-by: Geert Uytterhoeven Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig index d66d517c99a9..b99da294e9a3 100644 --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -165,7 +165,7 @@ config XFS_ONLINE_SCRUB_STATS bool "XFS online metadata check usage data collection" default y depends on XFS_ONLINE_SCRUB - select DEBUG_FS + depends on DEBUG_FS help If you say Y here, the kernel will gather usage data about the online metadata check subsystem. This includes the number From 179753aa5b7890b311968c033d08f558f0a7be21 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:48 +0200 Subject: [PATCH 101/263] drm/panic: Fix drawing the logo on a small narrow screen If the logo width is bigger than the framebuffer width, and the height is big enough to hold the logo and the message, it will draw at x coordinate that are higher than the width, and ends up in a corrupted image. Fixes: 4b570ac2eb54 ("drm/rect: Add drm_rect_overlap()") Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-2-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 1d6312fa1429..23ba791c6131 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -429,6 +429,9 @@ static void drm_panic_logo_rect(struct drm_rect *rect, const struct font_desc *f static void drm_panic_logo_draw(struct drm_scanout_buffer *sb, struct drm_rect *rect, const struct font_desc *font, u32 fg_color) { + if (rect->x2 > sb->width || rect->y2 > sb->height) + return; + if (logo_mono) drm_panic_blit(sb, rect, logo_mono->data, DIV_ROUND_UP(drm_rect_width(rect), 8), 1, fg_color); From cfa56e0a0e9b259077b0cb88b431e37dc9a67dee Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:49 +0200 Subject: [PATCH 102/263] drm/panic: Fix overlap between qr code and logo The borders of the qr code was not taken into account to check if it overlap with the logo, leading to the logo being partially covered. Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-3-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 23ba791c6131..179cbf21f22d 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -749,7 +749,7 @@ static int _draw_panic_static_qr_code(struct drm_scanout_buffer *sb) /* Fill with the background color, and draw text on top */ drm_panic_fill(sb, &r_screen, bg_color); - if (!drm_rect_overlap(&r_logo, &r_msg) && !drm_rect_overlap(&r_logo, &r_qr)) + if (!drm_rect_overlap(&r_logo, &r_msg) && !drm_rect_overlap(&r_logo, &r_qr_canvas)) drm_panic_logo_draw(sb, &r_logo, font, fg_color); draw_txt_rectangle(sb, font, panic_msg, panic_msg_lines, true, &r_msg, fg_color); From 4fcffb5e5c8c0c8e2ad9c99a22305a0afbecc294 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:50 +0200 Subject: [PATCH 103/263] drm/panic: Fix qr_code, ensure vmargin is positive Depending on qr_code size and screen size, the vertical margin can be negative, that means there is not enough room to draw the qr_code. So abort early, to avoid a segfault by trying to draw at negative coordinates. Fixes: cb5164ac43d0f ("drm/panic: Add a QR code panic screen") Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-4-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 179cbf21f22d..281bb2dabf81 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -736,7 +736,10 @@ static int _draw_panic_static_qr_code(struct drm_scanout_buffer *sb) pr_debug("QR width %d and scale %d\n", qr_width, scale); r_qr_canvas = DRM_RECT_INIT(0, 0, qr_canvas_width * scale, qr_canvas_width * scale); - v_margin = (sb->height - drm_rect_height(&r_qr_canvas) - drm_rect_height(&r_msg)) / 5; + v_margin = sb->height - drm_rect_height(&r_qr_canvas) - drm_rect_height(&r_msg); + if (v_margin < 0) + return -ENOSPC; + v_margin /= 5; drm_rect_translate(&r_qr_canvas, (sb->width - r_qr_canvas.x2) / 2, 2 * v_margin); r_qr = DRM_RECT_INIT(r_qr_canvas.x1 + QR_MARGIN * scale, r_qr_canvas.y1 + QR_MARGIN * scale, From e9b36fe0630046e61224216dc92513a69f72b5f0 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:51 +0200 Subject: [PATCH 104/263] drm/panic: Fix kmsg text drawing rectangle The rectangle height was larger than the screen size. This has no real impact. Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-5-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 281bb2dabf81..69be9d835ccf 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -480,7 +480,7 @@ static int draw_line_with_wrap(struct drm_scanout_buffer *sb, const struct font_ struct drm_panic_line *line, int yoffset, u32 fg_color) { int chars_per_row = sb->width / font->width; - struct drm_rect r_txt = DRM_RECT_INIT(0, yoffset, sb->width, sb->height); + struct drm_rect r_txt = DRM_RECT_INIT(0, yoffset, sb->width, font->height); struct drm_panic_line line_wrap; if (line->len > chars_per_row) { From 2e337dd278c6c38982b520c309f36e0f88696e6e Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:52 +0200 Subject: [PATCH 105/263] drm/panic: Fix divide by 0 if the screen width < font width In the unlikely case that the screen is tiny, and smaller than the font width, it leads to a divide by 0: draw_line_with_wrap() chars_per_row = sb->width / font->width = 0 line_wrap.len = line->len % chars_per_row; This will trigger a divide by 0 Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-6-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 69be9d835ccf..bc5158683b2b 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -523,7 +523,7 @@ static void draw_panic_static_kmsg(struct drm_scanout_buffer *sb) struct drm_panic_line line; int yoffset; - if (!font) + if (!font || font->width > sb->width) return; yoffset = sb->height - font->height - (sb->height % font->height) / 2; From 23437509a69476d4f896891032d62ac868731668 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 9 Oct 2025 14:24:53 +0200 Subject: [PATCH 106/263] drm/panic: Fix 24bit pixel crossing page boundaries When using page list framebuffer, and using RGB888 format, some pixels can cross the page boundaries, and this case was not handled, leading to writing 1 or 2 bytes on the next virtual address. Add a check and a specific function to handle this case. Fixes: c9ff2808790f0 ("drm/panic: Add support to scanout buffer as array of pages") Reviewed-by: Javier Martinez Canillas Link: https://lore.kernel.org/r/20251009122955.562888-7-jfalempe@redhat.com Signed-off-by: Jocelyn Falempe --- drivers/gpu/drm/drm_panic.c | 46 +++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index bc5158683b2b..d4b6ea42db0f 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -174,6 +174,33 @@ static void drm_panic_write_pixel24(void *vaddr, unsigned int offset, u32 color) *p = color & 0xff; } +/* + * Special case if the pixel crosses page boundaries + */ +static void drm_panic_write_pixel24_xpage(void *vaddr, struct page *next_page, + unsigned int offset, u32 color) +{ + u8 *vaddr2; + u8 *p = vaddr + offset; + + vaddr2 = kmap_local_page_try_from_panic(next_page); + + *p++ = color & 0xff; + color >>= 8; + + if (offset == PAGE_SIZE - 1) + p = vaddr2; + + *p++ = color & 0xff; + color >>= 8; + + if (offset == PAGE_SIZE - 2) + p = vaddr2; + + *p = color & 0xff; + kunmap_local(vaddr2); +} + static void drm_panic_write_pixel32(void *vaddr, unsigned int offset, u32 color) { u32 *p = vaddr + offset; @@ -231,7 +258,14 @@ static void drm_panic_blit_page(struct page **pages, unsigned int dpitch, page = new_page; vaddr = kmap_local_page_try_from_panic(pages[page]); } - if (vaddr) + if (!vaddr) + continue; + + // Special case for 24bit, as a pixel might cross page boundaries + if (cpp == 3 && offset + 3 > PAGE_SIZE) + drm_panic_write_pixel24_xpage(vaddr, pages[page + 1], + offset, fg32); + else drm_panic_write_pixel(vaddr, offset, fg32, cpp); } } @@ -321,7 +355,15 @@ static void drm_panic_fill_page(struct page **pages, unsigned int dpitch, page = new_page; vaddr = kmap_local_page_try_from_panic(pages[page]); } - drm_panic_write_pixel(vaddr, offset, color, cpp); + if (!vaddr) + continue; + + // Special case for 24bit, as a pixel might cross page boundaries + if (cpp == 3 && offset + 3 > PAGE_SIZE) + drm_panic_write_pixel24_xpage(vaddr, pages[page + 1], + offset, color); + else + drm_panic_write_pixel(vaddr, offset, color, cpp); } } if (vaddr) From a8c861f401b4b2f8feda282abff929fa91c1f73a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 15 Oct 2025 15:29:30 +0900 Subject: [PATCH 107/263] xfs: avoid busy loops in GCD When GCD has no new work to handle, but read, write or reset commands are outstanding, it currently busy loops, which is a bit suboptimal, and can lead to softlockup warnings in case of stuck commands. Change the code so that the task state is only set to running when work is performed, which looks a bit tricky due to the design of the reading/writing/resetting lists that contain both in-flight and finished commands. Fixes: 080d01c41d44 ("xfs: implement zoned garbage collection") Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_zone_gc.c | 81 +++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c index 064cd1a857a0..109877d9a6bf 100644 --- a/fs/xfs/xfs_zone_gc.c +++ b/fs/xfs/xfs_zone_gc.c @@ -491,21 +491,6 @@ xfs_zone_gc_select_victim( struct xfs_rtgroup *victim_rtg = NULL; unsigned int bucket; - if (xfs_is_shutdown(mp)) - return false; - - if (iter->victim_rtg) - return true; - - /* - * Don't start new work if we are asked to stop or park. - */ - if (kthread_should_stop() || kthread_should_park()) - return false; - - if (!xfs_zoned_need_gc(mp)) - return false; - spin_lock(&zi->zi_used_buckets_lock); for (bucket = 0; bucket < XFS_ZONE_USED_BUCKETS; bucket++) { victim_rtg = xfs_zone_gc_pick_victim_from(mp, bucket); @@ -975,6 +960,27 @@ xfs_zone_gc_reset_zones( } while (next); } +static bool +xfs_zone_gc_should_start_new_work( + struct xfs_zone_gc_data *data) +{ + if (xfs_is_shutdown(data->mp)) + return false; + if (!xfs_zone_gc_space_available(data)) + return false; + + if (!data->iter.victim_rtg) { + if (kthread_should_stop() || kthread_should_park()) + return false; + if (!xfs_zoned_need_gc(data->mp)) + return false; + if (!xfs_zone_gc_select_victim(data)) + return false; + } + + return true; +} + /* * Handle the work to read and write data for GC and to reset the zones, * including handling all completions. @@ -982,7 +988,7 @@ xfs_zone_gc_reset_zones( * Note that the order of the chunks is preserved so that we don't undo the * optimal order established by xfs_zone_gc_query(). */ -static bool +static void xfs_zone_gc_handle_work( struct xfs_zone_gc_data *data) { @@ -996,30 +1002,22 @@ xfs_zone_gc_handle_work( zi->zi_reset_list = NULL; spin_unlock(&zi->zi_reset_list_lock); - if (!xfs_zone_gc_select_victim(data) || - !xfs_zone_gc_space_available(data)) { - if (list_empty(&data->reading) && - list_empty(&data->writing) && - list_empty(&data->resetting) && - !reset_list) - return false; - } - - __set_current_state(TASK_RUNNING); - try_to_freeze(); - - if (reset_list) + if (reset_list) { + set_current_state(TASK_RUNNING); xfs_zone_gc_reset_zones(data, reset_list); + } list_for_each_entry_safe(chunk, next, &data->resetting, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; + set_current_state(TASK_RUNNING); xfs_zone_gc_finish_reset(chunk); } list_for_each_entry_safe(chunk, next, &data->writing, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; + set_current_state(TASK_RUNNING); xfs_zone_gc_finish_chunk(chunk); } @@ -1027,15 +1025,18 @@ xfs_zone_gc_handle_work( list_for_each_entry_safe(chunk, next, &data->reading, entry) { if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE) break; + set_current_state(TASK_RUNNING); xfs_zone_gc_write_chunk(chunk); } blk_finish_plug(&plug); - blk_start_plug(&plug); - while (xfs_zone_gc_start_chunk(data)) - ; - blk_finish_plug(&plug); - return true; + if (xfs_zone_gc_should_start_new_work(data)) { + set_current_state(TASK_RUNNING); + blk_start_plug(&plug); + while (xfs_zone_gc_start_chunk(data)) + ; + blk_finish_plug(&plug); + } } /* @@ -1059,8 +1060,18 @@ xfs_zoned_gcd( for (;;) { set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE); xfs_set_zonegc_running(mp); - if (xfs_zone_gc_handle_work(data)) + + xfs_zone_gc_handle_work(data); + + /* + * Only sleep if nothing set the state to running. Else check for + * work again as someone might have queued up more work and woken + * us in the meantime. + */ + if (get_current_state() == TASK_RUNNING) { + try_to_freeze(); continue; + } if (list_empty(&data->reading) && list_empty(&data->writing) && From ca3d643a970139f5456f90dd555a0955752d70cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 17 Oct 2025 05:55:41 +0200 Subject: [PATCH 108/263] xfs: cache open zone in inode->i_private The MRU cache for open zones is unfortunately still not ideal, as it can time out pretty easily when doing heavy I/O to hard disks using up most or all open zones. One option would be to just increase the timeout, but while looking into that I realized we're just better off caching it indefinitely as there is no real downside to that once we don't hold a reference to the cache open zone. So switch the open zone to RCU freeing, and then stash the last used open zone into inode->i_private. This helps to significantly reduce fragmentation by keeping I/O localized to zones for workloads that write using many open files to HDD. Fixes: 4e4d52075577 ("xfs: add the zoned space allocator") Signed-off-by: Christoph Hellwig Reviewed-by: Hans Holmberg Reviewed-by: Damien Le Moal Tested-by: Damien Le Moal Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_mount.h | 1 - fs/xfs/xfs_super.c | 6 ++ fs/xfs/xfs_zone_alloc.c | 129 ++++++++++++++-------------------------- fs/xfs/xfs_zone_priv.h | 2 + 4 files changed, 53 insertions(+), 85 deletions(-) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index f046d1215b04..b871dfde372b 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -236,7 +236,6 @@ typedef struct xfs_mount { bool m_update_sb; /* sb needs update in mount */ unsigned int m_max_open_zones; unsigned int m_zonegc_low_space; - struct xfs_mru_cache *m_zone_cache; /* Inode to open zone cache */ /* max_atomic_write mount option value */ unsigned long long m_awu_max_bytes; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e85a156dc17d..464ae1e657d9 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -786,6 +786,12 @@ xfs_fs_evict_inode( truncate_inode_pages_final(&inode->i_data); clear_inode(inode); + + if (IS_ENABLED(CONFIG_XFS_RT) && + S_ISREG(inode->i_mode) && inode->i_private) { + xfs_open_zone_put(inode->i_private); + inode->i_private = NULL; + } } static void diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c index 1b462cd5d8fa..23cdab4515bb 100644 --- a/fs/xfs/xfs_zone_alloc.c +++ b/fs/xfs/xfs_zone_alloc.c @@ -26,14 +26,22 @@ #include "xfs_trace.h" #include "xfs_mru_cache.h" +static void +xfs_open_zone_free_rcu( + struct callback_head *cb) +{ + struct xfs_open_zone *oz = container_of(cb, typeof(*oz), oz_rcu); + + xfs_rtgroup_rele(oz->oz_rtg); + kfree(oz); +} + void xfs_open_zone_put( struct xfs_open_zone *oz) { - if (atomic_dec_and_test(&oz->oz_ref)) { - xfs_rtgroup_rele(oz->oz_rtg); - kfree(oz); - } + if (atomic_dec_and_test(&oz->oz_ref)) + call_rcu(&oz->oz_rcu, xfs_open_zone_free_rcu); } static inline uint32_t @@ -756,98 +764,55 @@ xfs_mark_rtg_boundary( ioend->io_flags |= IOMAP_IOEND_BOUNDARY; } -/* - * Cache the last zone written to for an inode so that it is considered first - * for subsequent writes. - */ -struct xfs_zone_cache_item { - struct xfs_mru_cache_elem mru; - struct xfs_open_zone *oz; -}; - -static inline struct xfs_zone_cache_item * -xfs_zone_cache_item(struct xfs_mru_cache_elem *mru) -{ - return container_of(mru, struct xfs_zone_cache_item, mru); -} - -static void -xfs_zone_cache_free_func( - void *data, - struct xfs_mru_cache_elem *mru) -{ - struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru); - - xfs_open_zone_put(item->oz); - kfree(item); -} - /* * Check if we have a cached last open zone available for the inode and * if yes return a reference to it. */ static struct xfs_open_zone * -xfs_cached_zone( - struct xfs_mount *mp, - struct xfs_inode *ip) +xfs_get_cached_zone( + struct xfs_inode *ip) { - struct xfs_mru_cache_elem *mru; - struct xfs_open_zone *oz; + struct xfs_open_zone *oz; - mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino); - if (!mru) - return NULL; - oz = xfs_zone_cache_item(mru)->oz; + rcu_read_lock(); + oz = VFS_I(ip)->i_private; if (oz) { /* * GC only steals open zones at mount time, so no GC zones * should end up in the cache. */ ASSERT(!oz->oz_is_gc); - ASSERT(atomic_read(&oz->oz_ref) > 0); - atomic_inc(&oz->oz_ref); + if (!atomic_inc_not_zero(&oz->oz_ref)) + oz = NULL; } - xfs_mru_cache_done(mp->m_zone_cache); + rcu_read_unlock(); + return oz; } /* - * Update the last used zone cache for a given inode. + * Stash our zone in the inode so that is is reused for future allocations. * - * The caller must have a reference on the open zone. + * The open_zone structure will be pinned until either the inode is freed or + * until the cached open zone is replaced with a different one because the + * current one was full when we tried to use it. This means we keep any + * open zone around forever as long as any inode that used it for the last + * write is cached, which slightly increases the memory use of cached inodes + * that were every written to, but significantly simplifies the cached zone + * lookup. Because the open_zone is clearly marked as full when all data + * in the underlying RTG was written, the caching is always safe. */ static void -xfs_zone_cache_create_association( - struct xfs_inode *ip, - struct xfs_open_zone *oz) +xfs_set_cached_zone( + struct xfs_inode *ip, + struct xfs_open_zone *oz) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_zone_cache_item *item = NULL; - struct xfs_mru_cache_elem *mru; + struct xfs_open_zone *old_oz; - ASSERT(atomic_read(&oz->oz_ref) > 0); atomic_inc(&oz->oz_ref); - - mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino); - if (mru) { - /* - * If we have an association already, update it to point to the - * new zone. - */ - item = xfs_zone_cache_item(mru); - xfs_open_zone_put(item->oz); - item->oz = oz; - xfs_mru_cache_done(mp->m_zone_cache); - return; - } - - item = kmalloc(sizeof(*item), GFP_KERNEL); - if (!item) { - xfs_open_zone_put(oz); - return; - } - item->oz = oz; - xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru); + old_oz = xchg(&VFS_I(ip)->i_private, oz); + if (old_oz) + xfs_open_zone_put(old_oz); } static void @@ -891,15 +856,14 @@ xfs_zone_alloc_and_submit( * the inode is still associated with a zone and use that if so. */ if (!*oz) - *oz = xfs_cached_zone(mp, ip); + *oz = xfs_get_cached_zone(ip); if (!*oz) { select_zone: *oz = xfs_select_zone(mp, write_hint, pack_tight); if (!*oz) goto out_error; - - xfs_zone_cache_create_association(ip, *oz); + xfs_set_cached_zone(ip, *oz); } alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size), @@ -977,6 +941,12 @@ xfs_free_open_zones( xfs_open_zone_put(oz); } spin_unlock(&zi->zi_open_zones_lock); + + /* + * Wait for all open zones to be freed so that they drop the group + * references: + */ + rcu_barrier(); } struct xfs_init_zones { @@ -1290,14 +1260,6 @@ xfs_mount_zones( error = xfs_zone_gc_mount(mp); if (error) goto out_free_zone_info; - - /* - * Set up a mru cache to track inode to open zone for data placement - * purposes. The magic values for group count and life time is the - * same as the defaults for file streams, which seems sane enough. - */ - xfs_mru_cache_create(&mp->m_zone_cache, mp, - 5000, 10, xfs_zone_cache_free_func); return 0; out_free_zone_info: @@ -1311,5 +1273,4 @@ xfs_unmount_zones( { xfs_zone_gc_unmount(mp); xfs_free_zone_info(mp->m_zone_info); - xfs_mru_cache_destroy(mp->m_zone_cache); } diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h index 35e6de3d25ed..4322e26dd99a 100644 --- a/fs/xfs/xfs_zone_priv.h +++ b/fs/xfs/xfs_zone_priv.h @@ -44,6 +44,8 @@ struct xfs_open_zone { * the life time of an open zone. */ struct xfs_rtgroup *oz_rtg; + + struct rcu_head oz_rcu; }; /* From 0f41997b1b2b769b73415512d2afaae80630e4fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Oct 2025 12:12:48 +0200 Subject: [PATCH 109/263] xfs: don't use __GFP_NOFAIL in xfs_init_fs_context With enough debug options enabled, struct xfs_mount is larger than 4k and thus NOFAIL allocations won't work for it. xfs_init_fs_context is early in the mount process, and if we really are out of memory there we'd better give up ASAP anyway. Fixes: 7b77b46a6137 ("xfs: use kmem functions for struct xfs_mount") Reported-by: syzbot+359a67b608de1ef72f65@syzkaller.appspotmail.com Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 464ae1e657d9..9d51186b24dd 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2227,7 +2227,7 @@ xfs_init_fs_context( struct xfs_mount *mp; int i; - mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL); + mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) return -ENOMEM; From c4d35e635f3a65aec291a6045cae8c99cede5bba Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Mon, 20 Oct 2025 17:51:44 +0900 Subject: [PATCH 110/263] gpio: 104-idio-16: Define maximum valid register address offset Attempting to load the 104-idio-16 module fails during regmap initialization with a return error -EINVAL. This is a result of the regmap cache failing initialization. Set the idio_16_regmap_config max_register member to fix this failure. Fixes: 2c210c9a34a3 ("gpio: 104-idio-16: Migrate to the regmap API") Reported-by: Mark Cave-Ayland Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com Suggested-by: Mark Cave-Ayland Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-1-ebeb50e93c33@kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-104-idio-16.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c index ffe7e1cb6b23..fe5c10cd5c32 100644 --- a/drivers/gpio/gpio-104-idio-16.c +++ b/drivers/gpio/gpio-104-idio-16.c @@ -59,6 +59,7 @@ static const struct regmap_config idio_16_regmap_config = { .reg_stride = 1, .val_bits = 8, .io_port = true, + .max_register = 0x5, .wr_table = &idio_16_wr_table, .rd_table = &idio_16_rd_table, .volatile_table = &idio_16_rd_table, From d37623132a6347b4ab9e2179eb3f2fa77863c364 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Mon, 20 Oct 2025 17:51:45 +0900 Subject: [PATCH 111/263] gpio: pci-idio-16: Define maximum valid register address offset Attempting to load the pci-idio-16 module fails during regmap initialization with a return error -EINVAL. This is a result of the regmap cache failing initialization. Set the idio_16_regmap_config max_register member to fix this failure. Fixes: 73d8f3efc5c2 ("gpio: pci-idio-16: Migrate to the regmap API") Reported-by: Mark Cave-Ayland Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com Suggested-by: Mark Cave-Ayland Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-2-ebeb50e93c33@kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pci-idio-16.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c index 476cea1b5ed7..9d28ca8e1d6f 100644 --- a/drivers/gpio/gpio-pci-idio-16.c +++ b/drivers/gpio/gpio-pci-idio-16.c @@ -41,6 +41,7 @@ static const struct regmap_config idio_16_regmap_config = { .reg_stride = 1, .val_bits = 8, .io_port = true, + .max_register = 0x7, .wr_table = &idio_16_wr_table, .rd_table = &idio_16_rd_table, .volatile_table = &idio_16_rd_table, From 876f0d43af78639790bee0e57b39d498ae35adcf Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Mon, 20 Oct 2025 15:41:24 +0100 Subject: [PATCH 112/263] x86/microcode: Fix Entrysign revision check for Zen1/Naples ... to match AMD's statement here: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-7033.html Fixes: 50cef76d5cb0 ("x86/microcode/AMD: Load only SHA256-checksummed patches") Signed-off-by: Andrew Cooper Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://patch.msgid.link/20251020144124.2930784-1-andrew.cooper3@citrix.com --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index cdce885e2fd5..28ed8c089024 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -194,7 +194,7 @@ static bool need_sha_check(u32 cur_rev) } switch (cur_rev >> 8) { - case 0x80012: return cur_rev <= 0x800126f; break; + case 0x80012: return cur_rev <= 0x8001277; break; case 0x80082: return cur_rev <= 0x800820f; break; case 0x83010: return cur_rev <= 0x830107c; break; case 0x86001: return cur_rev <= 0x860010e; break; From 204ced4108f5d38f6804968fd9543cc69c3f8da6 Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Fri, 3 Oct 2025 12:19:36 -0500 Subject: [PATCH 113/263] x86/bugs: Qualify RETBLEED_INTEL_MSG When retbleed mitigation is disabled, the kernel already prints an info message that the system is vulnerable. Recent code restructuring also inadvertently led to RETBLEED_INTEL_MSG being printed as an error, which is unnecessary as retbleed mitigation was already explicitly disabled (by config option, cmdline, etc.). Qualify this print statement so the warning is not printed unless an actual retbleed mitigation was selected and is being disabled due to incompatibility with spectre_v2. Fixes: e3b78a7ad5ea ("x86/bugs: Restructure retbleed mitigation") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220624 Signed-off-by: David Kaplan Signed-off-by: Borislav Petkov (AMD) Link: https://patch.msgid.link/20251003171936.155391-1-david.kaplan@amd.com --- arch/x86/kernel/cpu/bugs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6a526ae1fe99..e08de5b0d20b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1463,7 +1463,9 @@ static void __init retbleed_update_mitigation(void) break; default: if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) { - pr_err(RETBLEED_INTEL_MSG); + if (retbleed_mitigation != RETBLEED_MITIGATION_NONE) + pr_err(RETBLEED_INTEL_MSG); + retbleed_mitigation = RETBLEED_MITIGATION_NONE; } } From 6ed8bfd24ce1cb31742b09a3eb557cd008533eec Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Tue, 21 Oct 2025 09:03:53 +0800 Subject: [PATCH 114/263] slab: Avoid race on slab->obj_exts in alloc_slab_obj_exts If two competing threads enter alloc_slab_obj_exts() and one of them fails to allocate the object extension vector, it might override the valid slab->obj_exts allocated by the other thread with OBJEXTS_ALLOC_FAIL. This will cause the thread that lost this race and expects a valid pointer to dereference a NULL pointer later on. Update slab->obj_exts atomically using cmpxchg() to avoid slab->obj_exts overrides by racing threads. Thanks for Vlastimil and Suren's help with debugging. Fixes: f7381b911640 ("slab: mark slab->obj_exts allocation failures unconditionally") Cc: Suggested-by: Suren Baghdasaryan Signed-off-by: Hao Ge Reviewed-by: Harry Yoo Reviewed-by: Suren Baghdasaryan Link: https://patch.msgid.link/20251021010353.1187193-1-hao.ge@linux.dev Signed-off-by: Vlastimil Babka --- mm/slub.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a8fcc7e6f25a..23d8f54e9486 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2054,7 +2054,7 @@ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) static inline void mark_failed_objexts_alloc(struct slab *slab) { - slab->obj_exts = OBJEXTS_ALLOC_FAIL; + cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL); } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, @@ -2136,6 +2136,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, #ifdef CONFIG_MEMCG new_exts |= MEMCG_DATA_OBJEXTS; #endif +retry: old_exts = READ_ONCE(slab->obj_exts); handle_failed_objexts_alloc(old_exts, vec, objects); if (new_slab) { @@ -2145,8 +2146,7 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, * be simply assigned. */ slab->obj_exts = new_exts; - } else if ((old_exts & ~OBJEXTS_FLAGS_MASK) || - cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) { + } else if (old_exts & ~OBJEXTS_FLAGS_MASK) { /* * If the slab is already in use, somebody can allocate and * assign slabobj_exts in parallel. In this case the existing @@ -2158,6 +2158,9 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, else kfree(vec); return 0; + } else if (cmpxchg(&slab->obj_exts, old_exts, new_exts) != old_exts) { + /* Retry if a racing thread changed slab->obj_exts from under us. */ + goto retry; } if (allow_spin) From 89939cf252d80237ed380c1d20575ecfe56ff894 Mon Sep 17 00:00:00 2001 From: Meenakshikumar Somasundaram Date: Mon, 29 Sep 2025 14:28:34 -0400 Subject: [PATCH 115/263] drm/amd/display: Fix NULL pointer dereference [Why] On a mst branch with multi display setup, dc context is obselete after updating the first stream. Referencing the same dc context for the next stream update to fetch dc pointer leads to NULL pointer dereference. [How] Get the dc pointer from the link rather than context. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Charlene Liu Signed-off-by: Meenakshikumar Somasundaram Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher (cherry picked from commit dc69b48988b171d6ccb3a083607e4dff015e2c0d) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c index 9e33bf937a69..2676ae9f6fe8 100644 --- a/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c +++ b/drivers/gpu/drm/amd/display/dc/link/accessories/link_dp_cts.c @@ -78,6 +78,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, struct audio_output audio_output[MAX_PIPES]; struct dc_stream_state *streams_on_link[MAX_PIPES]; int num_streams_on_link = 0; + struct dc *dc = (struct dc *)link->dc; needs_divider_update = (link->dc->link_srv->dp_get_encoding_format(link_setting) != link->dc->link_srv->dp_get_encoding_format((const struct dc_link_settings *) &link->cur_link_settings)); @@ -150,7 +151,7 @@ static void dp_retrain_link_dp_test(struct dc_link *link, if (streams_on_link[i] && streams_on_link[i]->link && streams_on_link[i]->link == link) { stream_update.stream = streams_on_link[i]; stream_update.dpms_off = &dpms_off; - dc_update_planes_and_stream(state->clk_mgr->ctx->dc, NULL, 0, streams_on_link[i], &stream_update); + dc_update_planes_and_stream(dc, NULL, 0, streams_on_link[i], &stream_update); } } } From bec947cbe9a65783adb475a5fb47980d7b4f4796 Mon Sep 17 00:00:00 2001 From: Charlene Liu Date: Mon, 29 Sep 2025 20:29:30 -0400 Subject: [PATCH 116/263] drm/amd/display: increase max link count and fix link->enc NULL pointer access [why] 1.) dc->links[MAX_LINKS] array size smaller than actual requested. max_connector + max_dpia + 4 virtual = 14. increase from 12 to 14. 2.) hw_init() access null LINK_ENC for dpia non display_endpoint. Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Meenakshikumar Somasundaram Reviewed-by: Chris Park Signed-off-by: Charlene Liu Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher (cherry picked from commit d7f5a61e1b04ed87b008c8d327649d184dc5bb45) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c | 3 +++ drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 7c276c319086..ce3d0b45fb4c 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -200,6 +200,9 @@ void dcn401_init_hw(struct dc *dc) */ struct dc_link *link = dc->links[i]; + if (link->ep_type != DISPLAY_ENDPOINT_PHY) + continue; + link->link_enc->funcs->hw_init(link->link_enc); /* Check for enabled DIG to identify enabled display */ diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h index 41c76ba9ba56..62a39204fe0b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h @@ -44,7 +44,13 @@ */ #define MAX_PIPES 6 #define MAX_PHANTOM_PIPES (MAX_PIPES / 2) -#define MAX_LINKS (MAX_PIPES * 2 +2) + +#define MAX_DPIA 6 +#define MAX_CONNECTOR 6 +#define MAX_VIRTUAL_LINKS 4 + +#define MAX_LINKS (MAX_DPIA + MAX_CONNECTOR + MAX_VIRTUAL_LINKS) + #define MAX_DIG_LINK_ENCODERS 7 #define MAX_DWB_PIPES 1 #define MAX_HPO_DP2_ENCODERS 4 From 72a1eb3cf573ab957ae412f0efb0cf6ff0876234 Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Thu, 25 Sep 2025 10:23:59 -0400 Subject: [PATCH 117/263] drm/amd/display: use GFP_NOWAIT for allocation in interrupt handler schedule_dc_vmin_vmax() is called by dm_crtc_high_irq(). Hence, we cannot have the former sleep. Use GFP_NOWAIT for allocation in this function. Fixes: c210b757b400 ("drm/amd/display: fix dmub access race condition") Cc: Mario Limonciello Cc: Alex Deucher Reviewed-by: Sun peng (Leo) Li Signed-off-by: Aurabindo Pillai Signed-off-by: Alex Deucher (cherry picked from commit c04812cbe2f247a1c1e53a9b6c5e659963fe4065) Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6597475e245d..bfa3199591b6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -551,13 +551,13 @@ static void schedule_dc_vmin_vmax(struct amdgpu_device *adev, struct dc_stream_state *stream, struct dc_crtc_timing_adjust *adjust) { - struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_KERNEL); + struct vupdate_offload_work *offload_work = kzalloc(sizeof(*offload_work), GFP_NOWAIT); if (!offload_work) { drm_dbg_driver(adev_to_drm(adev), "Failed to allocate vupdate_offload_work\n"); return; } - struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_KERNEL); + struct dc_crtc_timing_adjust *adjust_copy = kzalloc(sizeof(*adjust_copy), GFP_NOWAIT); if (!adjust_copy) { drm_dbg_driver(adev_to_drm(adev), "Failed to allocate adjust_copy\n"); kfree(offload_work); From 143937ca51cc6ae2fccc61a1cb916abb24cd34f5 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 15 Oct 2025 10:37:12 +0800 Subject: [PATCH 118/263] arm64, mm: avoid always making PTE dirty in pte_mkwrite() Current pte_mkwrite_novma() makes PTE dirty unconditionally. This may mark some pages that are never written dirty wrongly. For example, do_swap_page() may map the exclusive pages with writable and clean PTEs if the VMA is writable and the page fault is for read access. However, current pte_mkwrite_novma() implementation always dirties the PTE. This may cause unnecessary disk writing if the pages are never written before being reclaimed. So, change pte_mkwrite_novma() to clear the PTE_RDONLY bit only if the PTE_DIRTY bit is set to make it possible to make the PTE writable and clean. The current behavior was introduced in commit 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()"). Before that, pte_mkwrite() only sets the PTE_WRITE bit, while set_pte_at() only clears the PTE_RDONLY bit if both the PTE_WRITE and the PTE_DIRTY bits are set. To test the performance impact of the patch, on an arm64 server machine, run 16 redis-server processes on socket 1 and 16 memtier_benchmark processes on socket 0 with mostly get transactions (that is, redis-server will mostly read memory only). The memory footprint of redis-server is larger than the available memory, so swap out/in will be triggered. Test results show that the patch can avoid most swapping out because the pages are mostly clean. And the benchmark throughput improves ~23.9% in the test. Fixes: 73e86cb03cf2 ("arm64: Move PTE_RDONLY bit handling out of set_pte_at()") Signed-off-by: Huang Ying Cc: Will Deacon Cc: Anshuman Khandual Cc: Ryan Roberts Cc: Gavin Shan Cc: Ard Biesheuvel Cc: Matthew Wilcox (Oracle) Cc: Yicong Yang Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index aa89c2e67ebc..0944e296dd4a 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -293,7 +293,8 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) static inline pte_t pte_mkwrite_novma(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + if (pte_sw_dirty(pte)) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); return pte; } From 4f76435fd517981f01608678c06ad9718a86ee98 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 29 Sep 2025 12:53:40 -0400 Subject: [PATCH 119/263] NFSD: Define actions for the new time_deleg FATTR4 attributes NFSv4 clients won't send legitimate GETATTR requests for these new attributes because they are intended to be used only with CB_GETATTR and SETATTR. But NFSD has to do something besides crashing if it ever sees a GETATTR request that queries these attributes. RFC 8881 Section 18.7.3 states: > The server MUST return a value for each attribute that the client > requests if the attribute is supported by the server for the > target file system. If the server does not support a particular > attribute on the target file system, then it MUST NOT return the > attribute value and MUST NOT set the attribute bit in the result > bitmap. The server MUST return an error if it supports an > attribute on the target but cannot obtain its value. In that case, > no attribute values will be returned. Further, RFC 9754 Section 5 states: > These new attributes are invalid to be used with GETATTR, VERIFY, > and NVERIFY, and they can only be used with CB_GETATTR and SETATTR > by a client holding an appropriate delegation. Thus there does not appear to be a specific server response mandated by specification. Taking the guidance that querying these attributes via GETATTR is "invalid", NFSD will return nfserr_inval, failing the request entirely. Reported-by: Robert Morris Closes: https://lore.kernel.org/linux-nfs/7819419cf0cb50d8130dc6b747765d2b8febc88a.camel@kernel.org/T/#t Fixes: 51c0d4f7e317 ("nfsd: add support for FATTR4_OPEN_ARGUMENTS") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c0a3c6a7c8bb..8f5ee3014abc 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2939,6 +2939,12 @@ struct nfsd4_fattr_args { typedef __be32(*nfsd4_enc_attr)(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args); +static __be32 nfsd4_encode_fattr4__inval(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfserr_inval; +} + static __be32 nfsd4_encode_fattr4__noop(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { @@ -3560,6 +3566,8 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop, [FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support, + [FATTR4_TIME_DELEG_ACCESS] = nfsd4_encode_fattr4__inval, + [FATTR4_TIME_DELEG_MODIFY] = nfsd4_encode_fattr4__inval, [FATTR4_OPEN_ARGUMENTS] = nfsd4_encode_fattr4_open_arguments, }; From abb1f08a2121dd270193746e43b2a9373db9ad84 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 30 Sep 2025 10:05:20 -0400 Subject: [PATCH 120/263] NFSD: Fix crash in nfsd4_read_release() When tracing is enabled, the trace_nfsd_read_done trace point crashes during the pynfs read.testNoFh test. Fixes: 15a8b55dbb1b ("nfsd: call op_release, even when op_func returns an error") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e466cf52d7d7..f9aeefc0da73 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -988,10 +988,11 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, static void nfsd4_read_release(union nfsd4_op_u *u) { - if (u->read.rd_nf) + if (u->read.rd_nf) { + trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, + u->read.rd_offset, u->read.rd_length); nfsd_file_put(u->read.rd_nf); - trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp, - u->read.rd_offset, u->read.rd_length); + } } static __be32 From 29cdfb4950702bb849f70f7e3b58b4eeb5c1441c Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 30 Sep 2025 11:31:34 -0700 Subject: [PATCH 121/263] nfsd: Avoid strlen conflict in nfsd4_encode_components_esc() There is an error building nfs4xdr.c with CONFIG_SUNRPC_DEBUG_TRACE=y and CONFIG_FORTIFY_SOURCE=n due to the local variable strlen conflicting with the function strlen(): In file included from include/linux/cpumask.h:11, from arch/x86/include/asm/paravirt.h:21, from arch/x86/include/asm/irqflags.h:102, from include/linux/irqflags.h:18, from include/linux/spinlock.h:59, from include/linux/mmzone.h:8, from include/linux/gfp.h:7, from include/linux/slab.h:16, from fs/nfsd/nfs4xdr.c:37: fs/nfsd/nfs4xdr.c: In function 'nfsd4_encode_components_esc': include/linux/kernel.h:321:46: error: called object 'strlen' is not a function or function pointer 321 | __trace_puts(_THIS_IP_, str, strlen(str)); \ | ^~~~~~ include/linux/kernel.h:265:17: note: in expansion of macro 'trace_puts' 265 | trace_puts(fmt); \ | ^~~~~~~~~~ include/linux/sunrpc/debug.h:34:41: note: in expansion of macro 'trace_printk' 34 | # define __sunrpc_printk(fmt, ...) trace_printk(fmt, ##__VA_ARGS__) | ^~~~~~~~~~~~ include/linux/sunrpc/debug.h:42:17: note: in expansion of macro '__sunrpc_printk' 42 | __sunrpc_printk(fmt, ##__VA_ARGS__); \ | ^~~~~~~~~~~~~~~ include/linux/sunrpc/debug.h:25:9: note: in expansion of macro 'dfprintk' 25 | dfprintk(FACILITY, fmt, ##__VA_ARGS__) | ^~~~~~~~ fs/nfsd/nfs4xdr.c:2646:9: note: in expansion of macro 'dprintk' 2646 | dprintk("nfsd4_encode_components(%s)\n", components); | ^~~~~~~ fs/nfsd/nfs4xdr.c:2643:13: note: declared here 2643 | int strlen, count=0; | ^~~~~~ This dprintk() instance is not particularly useful, so just remove it altogether to get rid of the immediate strlen() conflict. At the same time, eliminate the local strlen variable to avoid potential conflicts with strlen() in the future. Fixes: ec7d8e68ef0e ("sunrpc: add a Kconfig option to redirect dfprintk() output to trace buffer") Signed-off-by: Nathan Chancellor Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8f5ee3014abc..b689b792c21f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2628,10 +2628,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, __be32 *p; __be32 pathlen; int pathlen_offset; - int strlen, count=0; char *str, *end, *next; - - dprintk("nfsd4_encode_components(%s)\n", components); + int count = 0; pathlen_offset = xdr->buf->len; p = xdr_reserve_space(xdr, 4); @@ -2658,9 +2656,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep, for (; *end && (*end != sep); end++) /* find sep or end of string */; - strlen = end - str; - if (strlen) { - if (xdr_stream_encode_opaque(xdr, str, strlen) < 0) + if (end > str) { + if (xdr_stream_encode_opaque(xdr, str, end - str) < 0) return nfserr_resource; count++; } else From 3e7f011c255582d7c914133785bbba1990441713 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 2 Oct 2025 10:00:51 -0400 Subject: [PATCH 122/263] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND" I've found that pynfs COMP6 now leaves the connection or lease in a strange state, which causes CLOSE9 to hang indefinitely. I've dug into it a little, but I haven't been able to root-cause it yet. However, I bisected to commit 48aab1606fa8 ("NFSD: Remove the cap on number of operations per NFSv4 COMPOUND"). Tianshuo Han also reports a potential vulnerability when decoding an NFSv4 COMPOUND. An attacker can place an arbitrarily large op count in the COMPOUND header, which results in: [ 51.410584] nfsd: vmalloc error: size 1209533382144, exceeds total pages, mode:0xdc0(GFP_KERNEL|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0 when NFSD attempts to allocate the COMPOUND op array. Let's restore the operation-per-COMPOUND limit, but increased to 200 for now. Reported-by: tianshuo han Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org Tested-by: Tianshuo Han Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 14 ++++++++++++-- fs/nfsd/nfs4state.c | 1 + fs/nfsd/nfs4xdr.c | 4 +++- fs/nfsd/nfsd.h | 3 +++ fs/nfsd/xdr4.h | 1 + 5 files changed, 20 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f9aeefc0da73..7f7e6bb23a90 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2893,10 +2893,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) rqstp->rq_lease_breaker = (void **)&cstate->clp; - trace_nfsd_compound(rqstp, args->tag, args->taglen, args->opcnt); + trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; + if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) { + /* If there are still more operations to process, + * stop here and report NFS4ERR_RESOURCE. */ + if (cstate->minorversion == 0 && + args->client_opcnt > resp->opcnt) { + op->status = nfserr_resource; + goto encode_op; + } + } + /* * The XDR decode routines may have pre-set op->status; * for example, if there is a miscellaneous XDR error @@ -2973,7 +2983,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) status = op->status; } - trace_nfsd_compound_status(args->opcnt, resp->opcnt, + trace_nfsd_compound_status(args->client_opcnt, resp->opcnt, status, nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 81fa7cc6c77b..c1b54322c412 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3902,6 +3902,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs ca->headerpadsz = 0; ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc); ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc); + ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND); ca->maxresp_cached = min_t(u32, ca->maxresp_cached, NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ); ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b689b792c21f..6040a6145dad 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2488,8 +2488,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0) return false; - if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0) + if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0) return false; + argp->opcnt = min_t(u32, argp->client_opcnt, + NFSD_MAX_OPS_PER_COMPOUND); if (argp->opcnt > ARRAY_SIZE(argp->iops)) { argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops)); diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index ea87b42894dd..f19320018639 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -57,6 +57,9 @@ struct readdir_cd { __be32 err; /* 0, nfserr, or nfserr_eof */ }; +/* Maximum number of operations per session compound */ +#define NFSD_MAX_OPS_PER_COMPOUND 200 + struct nfsd_genl_rqstp { struct sockaddr rq_daddr; struct sockaddr rq_saddr; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index d4b48602b2b0..ee0570cbdd9e 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -903,6 +903,7 @@ struct nfsd4_compoundargs { char * tag; u32 taglen; u32 minorversion; + u32 client_opcnt; u32 opcnt; bool splice_ok; struct nfsd4_op *ops; From 7959ffbec062c35bda02aa635d21ac45dbfacd80 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Fri, 19 Sep 2025 17:28:53 +0300 Subject: [PATCH 123/263] nvmem: rcar-efuse: add missing MODULE_DEVICE_TABLE The nvmem-rcar-efuse driver can be compiled as a module. Add missing MODULE_DEVICE_TABLE so it can be matched by modalias and automatically loaded by udev. Cc: stable@vger.kernel.org Fixes: 1530b923a514 ("nvmem: Add R-Car E-FUSE driver") Signed-off-by: Cosmin Tanislav Reviewed-by: Geert Uytterhoeven Link: https://patch.msgid.link/20250919142856.2313927-1-cosmin-gabriel.tanislav.xa@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/rcar-efuse.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvmem/rcar-efuse.c b/drivers/nvmem/rcar-efuse.c index f24bdb9cb5a7..d9a96a1d59c8 100644 --- a/drivers/nvmem/rcar-efuse.c +++ b/drivers/nvmem/rcar-efuse.c @@ -127,6 +127,7 @@ static const struct of_device_id rcar_fuse_match[] = { { .compatible = "renesas,r8a779h0-otp", .data = &rcar_fuse_v4m }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, rcar_fuse_match); static struct platform_driver rcar_fuse_driver = { .probe = rcar_fuse_probe, From 70ad06df73a9796026b197d84ead751e096618c7 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 16 Oct 2025 15:50:40 +0200 Subject: [PATCH 124/263] misc: amd-sbi: Clarify that this is a BMC driver Add a sentence to the driver description to clarify that the sbrmi-i2c driver is intended to run on the BMC and not on the managed node. Add platform dependencies accordingly. Signed-off-by: Jean Delvare Link: https://lore.kernel.org/r/5c9f7100-0e59-4237-a252-43c3ee4802a2@amd.com Link: https://patch.msgid.link/20251016155040.0e86c102@endymion Signed-off-by: Greg Kroah-Hartman --- drivers/misc/amd-sbi/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/amd-sbi/Kconfig b/drivers/misc/amd-sbi/Kconfig index 4aae0733d0fc..ab594908cb4a 100644 --- a/drivers/misc/amd-sbi/Kconfig +++ b/drivers/misc/amd-sbi/Kconfig @@ -2,9 +2,11 @@ config AMD_SBRMI_I2C tristate "AMD side band RMI support" depends on I2C + depends on ARM || ARM64 || COMPILE_TEST select REGMAP_I2C help Side band RMI over I2C support for AMD out of band management. + This driver is intended to run on the BMC, not the managed node. This driver can also be built as a module. If so, the module will be called sbrmi-i2c. From 410d6c2ad4d1a88efa0acbb9966693725b564933 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Thu, 16 Oct 2025 15:59:12 +0300 Subject: [PATCH 125/263] mei: me: add wildcat lake P DID Add Wildcat Lake P device id. Cc: stable@vger.kernel.org Co-developed-by: Tomas Winkler Signed-off-by: Tomas Winkler Signed-off-by: Alexander Usyskin Link: https://patch.msgid.link/20251016125912.2146136-1-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index bc40b940ae21..a4f75dc36929 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -120,6 +120,8 @@ #define MEI_DEV_ID_PTL_H 0xE370 /* Panther Lake H */ #define MEI_DEV_ID_PTL_P 0xE470 /* Panther Lake P */ +#define MEI_DEV_ID_WCL_P 0x4D70 /* Wildcat Lake P */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index b108a7c22388..b017ff29dbd1 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -127,6 +127,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_H, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_PTL_P, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_WCL_P, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; From fff111bf45cbeeb659324316d68554e35d350092 Mon Sep 17 00:00:00 2001 From: Junhao Xie Date: Fri, 17 Oct 2025 16:39:06 +0800 Subject: [PATCH 126/263] misc: fastrpc: Fix dma_buf object leak in fastrpc_map_lookup In fastrpc_map_lookup, dma_buf_get is called to obtain a reference to the dma_buf for comparison purposes. However, this reference is never released when the function returns, leading to a dma_buf memory leak. Fix this by adding dma_buf_put before returning from the function, ensuring that the temporarily acquired reference is properly released regardless of whether a matching map is found. Fixes: 9031626ade38 ("misc: fastrpc: Fix fastrpc_map_lookup operation") Cc: stable@kernel.org Signed-off-by: Junhao Xie Tested-by: Xilin Wu Rule: add Link: https://lore.kernel.org/stable/48B368FB4C7007A7%2B20251017083906.3259343-1-bigfoot%40radxa.com Link: https://patch.msgid.link/48B368FB4C7007A7+20251017083906.3259343-1-bigfoot@radxa.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 621bce7e101c..ee652ef01534 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -381,6 +381,8 @@ static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd, } spin_unlock(&fl->lock); + dma_buf_put(buf); + return ret; } From 98718e80af0bb1cd80f4bfe565dd60c57debad51 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Sat, 20 Sep 2025 16:17:50 -0700 Subject: [PATCH 127/263] mei: late_bind: Fix -Wincompatible-function-pointer-types-strict When building with -Wincompatible-function-pointer-types-strict, a warning designed to catch kernel control flow integrity (kCFI) issues at build time, there is an instance in the new mei late binding code originating from the type parameter of mei_lb_push_payload(): drivers/misc/mei/mei_lb.c:211:18: error: incompatible function pointer types initializing 'int (*)(struct device *, u32, u32, const void *, size_t)' (aka 'int (*)(struct device *, unsigned int, unsigned int, const void *, unsigned long)') with an expression of type 'int (struct device *, enum intel_lb_type, u32, const void *, size_t)' (aka 'int (struct device *, enum intel_lb_type, unsigned int, const void *, unsigned long)') [-Werror,-Wincompatible-function-pointer-types-strict] 211 | .push_payload = mei_lb_push_payload, | ^~~~~~~~~~~~~~~~~~~ While 'unsigned int' and 'enum intel_lb_type' are ABI compatible, hence no regular warning from -Wincompatible-function-pointer-types, the mismatch will trigger a kCFI violation when mei_lb_push_payload() is called indirectly. Update the type parameter of mei_lb_push_payload() to be 'u32' to match the prototype in 'struct intel_lb_component_ops', clearing up the warning and kCFI violation. Fixes: 741eeabb7c78 ("mei: late_bind: add late binding component driver") Signed-off-by: Nathan Chancellor Link: https://patch.msgid.link/20250920-drm-xe-fix-wifpts-v1-1-c89b5357c7ba@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/mei_lb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/misc/mei/mei_lb.c b/drivers/misc/mei/mei_lb.c index 77686b108d3c..78717ee8ac9a 100644 --- a/drivers/misc/mei/mei_lb.c +++ b/drivers/misc/mei/mei_lb.c @@ -134,8 +134,7 @@ static bool mei_lb_check_response(const struct device *dev, ssize_t bytes, return true; } -static int mei_lb_push_payload(struct device *dev, - enum intel_lb_type type, u32 flags, +static int mei_lb_push_payload(struct device *dev, u32 type, u32 flags, const void *payload, size_t payload_size) { struct mei_cl_device *cldev; From 87b318ba81dda2ee7b603f4f6c55e78ec3e95974 Mon Sep 17 00:00:00 2001 From: Deepanshu Kartikey Date: Wed, 24 Sep 2025 15:56:39 +0530 Subject: [PATCH 128/263] comedi: fix divide-by-zero in comedi_buf_munge() The comedi_buf_munge() function performs a modulo operation `async->munge_chan %= async->cmd.chanlist_len` without first checking if chanlist_len is zero. If a user program submits a command with chanlist_len set to zero, this causes a divide-by-zero error when the device processes data in the interrupt handler path. Add a check for zero chanlist_len at the beginning of the function, similar to the existing checks for !map and CMDF_RAWDATA flag. When chanlist_len is zero, update munge_count and return early, indicating the data was handled without munging. This prevents potential kernel panics from malformed user commands. Reported-by: syzbot+f6c3c066162d2c43a66c@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=f6c3c066162d2c43a66c Cc: stable@vger.kernel.org Signed-off-by: Deepanshu Kartikey Reviewed-by: Ian Abbott Link: https://patch.msgid.link/20250924102639.1256191-1-kartikey406@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/comedi_buf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/comedi/comedi_buf.c b/drivers/comedi/comedi_buf.c index 002c0e76baff..c7c262a2d8ca 100644 --- a/drivers/comedi/comedi_buf.c +++ b/drivers/comedi/comedi_buf.c @@ -317,7 +317,7 @@ static unsigned int comedi_buf_munge(struct comedi_subdevice *s, unsigned int count = 0; const unsigned int num_sample_bytes = comedi_bytes_per_sample(s); - if (!s->munge || (async->cmd.flags & CMDF_RAWDATA)) { + if (!s->munge || (async->cmd.flags & CMDF_RAWDATA) || async->cmd.chanlist_len == 0) { async->munge_count += num_bytes; return num_bytes; } From 2463ae285e5c162686fb19e822fb6b535e6e728a Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Sun, 19 Oct 2025 10:36:59 +0300 Subject: [PATCH 129/263] mei: txe: fix initialization order The mei_register() should move before the mei_start() for hook on class device to work. Same change was implemented in mei-me, missed from mei-txe. Fixes: 7704e6be4ed2 ("mei: hook mei_device on class device") Signed-off-by: Alexander Usyskin Link: https://patch.msgid.link/20251019073659.2646791-1-alexander.usyskin@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/pci-txe.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c index c9eb5c5393e4..06b55a891c6b 100644 --- a/drivers/misc/mei/pci-txe.c +++ b/drivers/misc/mei/pci-txe.c @@ -109,19 +109,19 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto end; } + err = mei_register(dev, &pdev->dev); + if (err) + goto release_irq; + if (mei_start(dev)) { dev_err(&pdev->dev, "init hw failure.\n"); err = -ENODEV; - goto release_irq; + goto deregister; } pm_runtime_set_autosuspend_delay(&pdev->dev, MEI_TXI_RPM_TIMEOUT); pm_runtime_use_autosuspend(&pdev->dev); - err = mei_register(dev, &pdev->dev); - if (err) - goto stop; - pci_set_drvdata(pdev, dev); /* @@ -144,8 +144,8 @@ static int mei_txe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; -stop: - mei_stop(dev); +deregister: + mei_deregister(dev); release_irq: mei_cancel_work(dev); mei_disable_interrupts(dev); From d90eeb8ecd227c204ab6c34a17b372bd950b7aa2 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 15 Oct 2025 14:26:55 +0000 Subject: [PATCH 130/263] binder: remove "invalid inc weak" check There are no scenarios where a weak increment is invalid on binder_node. The only possible case where it could be invalid is if the kernel delivers BR_DECREFS to the process that owns the node, and then increments the weak refcount again, effectively "reviving" a dead node. However, that is not possible: when the BR_DECREFS command is delivered, the kernel removes and frees the binder_node. The fact that you were able to call binder_inc_node_nilocked() implies that the node is not yet destroyed, which implies that BR_DECREFS has not been delivered to userspace, so incrementing the weak refcount is valid. Note that it's currently possible to trigger this condition if the owner calls BINDER_THREAD_EXIT while node->has_weak_ref is true. This causes BC_INCREFS on binder_ref instances to fail when they should not. Cc: stable@vger.kernel.org Fixes: 457b9a6f09f0 ("Staging: android: add binder driver") Reported-by: Yu-Ting Tseng Signed-off-by: Alice Ryhl Link: https://patch.msgid.link/20251015-binder-weak-inc-v1-1-7914b092c371@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3a09c54bc37b..a3a1b5c33ba3 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -851,17 +851,8 @@ static int binder_inc_node_nilocked(struct binder_node *node, int strong, } else { if (!internal) node->local_weak_refs++; - if (!node->has_weak_ref && list_empty(&node->work.entry)) { - if (target_list == NULL) { - pr_err("invalid inc weak node for %d\n", - node->debug_id); - return -EINVAL; - } - /* - * See comment above - */ + if (!node->has_weak_ref && target_list && list_empty(&node->work.entry)) binder_enqueue_work_ilocked(&node->work, target_list); - } } return 0; } From 4b1270902609ef0d935ed2faa2ea6d122bd148f5 Mon Sep 17 00:00:00 2001 From: Victoria Votokina Date: Fri, 10 Oct 2025 13:52:40 +0300 Subject: [PATCH 131/263] most: usb: Fix use-after-free in hdm_disconnect hdm_disconnect() calls most_deregister_interface(), which eventually unregisters the MOST interface device with device_unregister(iface->dev). If that drops the last reference, the device core may call release_mdev() immediately while hdm_disconnect() is still executing. The old code also freed several mdev-owned allocations in hdm_disconnect() and then performed additional put_device() calls. Depending on refcount order, this could lead to use-after-free or double-free when release_mdev() ran (or when unregister paths also performed puts). Fix by moving the frees of mdev-owned allocations into release_mdev(), so they happen exactly once when the device is truly released, and by dropping the extra put_device() calls in hdm_disconnect() that are redundant after device_unregister() and most_deregister_interface(). This addresses the KASAN slab-use-after-free reported by syzbot in hdm_disconnect(). See report and stack traces in the bug link below. Reported-by: syzbot+916742d5d24f6c254761@syzkaller.appspotmail.com Cc: stable Closes: https://syzkaller.appspot.com/bug?extid=916742d5d24f6c254761 Fixes: 97a6f772f36b ("drivers: most: add USB adapter driver") Signed-off-by: Victoria Votokina Link: https://patch.msgid.link/20251010105241.4087114-2-Victoria.Votokina@kaspersky.com Signed-off-by: Greg Kroah-Hartman --- drivers/most/most_usb.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/most/most_usb.c b/drivers/most/most_usb.c index cf5be9c449a5..3d8163bb7b46 100644 --- a/drivers/most/most_usb.c +++ b/drivers/most/most_usb.c @@ -929,6 +929,10 @@ static void release_mdev(struct device *dev) { struct most_dev *mdev = to_mdev_from_dev(dev); + kfree(mdev->busy_urbs); + kfree(mdev->cap); + kfree(mdev->conf); + kfree(mdev->ep_address); kfree(mdev); } /** @@ -1121,13 +1125,6 @@ static void hdm_disconnect(struct usb_interface *interface) if (mdev->dci) device_unregister(&mdev->dci->dev); most_deregister_interface(&mdev->iface); - - kfree(mdev->busy_urbs); - kfree(mdev->cap); - kfree(mdev->conf); - kfree(mdev->ep_address); - put_device(&mdev->dci->dev); - put_device(&mdev->dev); } static int hdm_suspend(struct usb_interface *interface, pm_message_t message) From a8cc9e5fcb0e2eef21513a4fec888f5712cb8162 Mon Sep 17 00:00:00 2001 From: Victoria Votokina Date: Fri, 10 Oct 2025 13:52:41 +0300 Subject: [PATCH 132/263] most: usb: hdm_probe: Fix calling put_device() before device initialization The early error path in hdm_probe() can jump to err_free_mdev before &mdev->dev has been initialized with device_initialize(). Calling put_device(&mdev->dev) there triggers a device core WARN and ends up invoking kref_put(&kobj->kref, kobject_release) on an uninitialized kobject. In this path the private struct was only kmalloc'ed and the intended release is effectively kfree(mdev) anyway, so free it directly instead of calling put_device() on an uninitialized device. This removes the WARNING and fixes the pre-initialization error path. Fixes: 97a6f772f36b ("drivers: most: add USB adapter driver") Cc: stable Signed-off-by: Victoria Votokina Link: https://patch.msgid.link/20251010105241.4087114-3-Victoria.Votokina@kaspersky.com Signed-off-by: Greg Kroah-Hartman --- drivers/most/most_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/most/most_usb.c b/drivers/most/most_usb.c index 3d8163bb7b46..10064d7b7249 100644 --- a/drivers/most/most_usb.c +++ b/drivers/most/most_usb.c @@ -1097,7 +1097,7 @@ hdm_probe(struct usb_interface *interface, const struct usb_device_id *id) err_free_conf: kfree(mdev->conf); err_free_mdev: - put_device(&mdev->dev); + kfree(mdev); return ret; } From 2eead19334516c8e9927c11b448fbe512b1f18a1 Mon Sep 17 00:00:00 2001 From: Kaushlendra Kumar Date: Tue, 23 Sep 2025 23:13:08 +0530 Subject: [PATCH 133/263] arch_topology: Fix incorrect error check in topology_parse_cpu_capacity() Fix incorrect use of PTR_ERR_OR_ZERO() in topology_parse_cpu_capacity() which causes the code to proceed with NULL clock pointers. The current logic uses !PTR_ERR_OR_ZERO(cpu_clk) which evaluates to true for both valid pointers and NULL, leading to potential NULL pointer dereference in clk_get_rate(). Per include/linux/err.h documentation, PTR_ERR_OR_ZERO(ptr) returns: "The error code within @ptr if it is an error pointer; 0 otherwise." This means PTR_ERR_OR_ZERO() returns 0 for both valid pointers AND NULL pointers. Therefore !PTR_ERR_OR_ZERO(cpu_clk) evaluates to true (proceed) when cpu_clk is either valid or NULL, causing clk_get_rate(NULL) to be called when of_clk_get() returns NULL. Replace with !IS_ERR_OR_NULL(cpu_clk) which only proceeds for valid pointers, preventing potential NULL pointer dereference in clk_get_rate(). Cc: stable Signed-off-by: Kaushlendra Kumar Reviewed-by: Sudeep Holla Fixes: b8fe128dad8f ("arch_topology: Adjust initial CPU capacities with current freq") Link: https://patch.msgid.link/20250923174308.1771906-1-kaushlendra.kumar@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/arch_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 1037169abb45..e1eff05bea4a 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -292,7 +292,7 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) * frequency (by keeping the initial capacity_freq_ref value). */ cpu_clk = of_clk_get(cpu_node, 0); - if (!PTR_ERR_OR_ZERO(cpu_clk)) { + if (!IS_ERR_OR_NULL(cpu_clk)) { per_cpu(capacity_freq_ref, cpu) = clk_get_rate(cpu_clk) / HZ_PER_KHZ; clk_put(cpu_clk); From 00aaae60faf554c27c95e93d47f200a93ff266ef Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Tue, 14 Oct 2025 18:53:53 +0300 Subject: [PATCH 134/263] gpio: regmap: add the .fixed_direction_output configuration parameter There are GPIO controllers such as the one present in the LX2160ARDB QIXIS FPGA which have fixed-direction input and output GPIO lines mixed together in a single register. This cannot be modeled using the gpio-regmap as-is since there is no way to present the true direction of a GPIO line. In order to make this use case possible, add a new configuration parameter - fixed_direction_output - into the gpio_regmap_config structure. This will enable user drivers to provide a bitmap that represents the fixed direction of the GPIO lines. Signed-off-by: Ioana Ciornei Acked-by: Bartosz Golaszewski Reviewed-by: Michael Walle Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-regmap.c | 26 ++++++++++++++++++++++++-- include/linux/gpio/regmap.h | 5 +++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-regmap.c b/drivers/gpio/gpio-regmap.c index ab9e4077fa60..f4267af00027 100644 --- a/drivers/gpio/gpio-regmap.c +++ b/drivers/gpio/gpio-regmap.c @@ -31,6 +31,7 @@ struct gpio_regmap { unsigned int reg_clr_base; unsigned int reg_dir_in_base; unsigned int reg_dir_out_base; + unsigned long *fixed_direction_output; #ifdef CONFIG_REGMAP_IRQ int regmap_irq_line; @@ -134,6 +135,13 @@ static int gpio_regmap_get_direction(struct gpio_chip *chip, unsigned int base, val, reg, mask; int invert, ret; + if (gpio->fixed_direction_output) { + if (test_bit(offset, gpio->fixed_direction_output)) + return GPIO_LINE_DIRECTION_OUT; + else + return GPIO_LINE_DIRECTION_IN; + } + if (gpio->reg_dat_base && !gpio->reg_set_base) return GPIO_LINE_DIRECTION_IN; if (gpio->reg_set_base && !gpio->reg_dat_base) @@ -284,6 +292,17 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config goto err_free_gpio; } + if (config->fixed_direction_output) { + gpio->fixed_direction_output = bitmap_alloc(chip->ngpio, + GFP_KERNEL); + if (!gpio->fixed_direction_output) { + ret = -ENOMEM; + goto err_free_gpio; + } + bitmap_copy(gpio->fixed_direction_output, + config->fixed_direction_output, chip->ngpio); + } + /* if not set, assume there is only one register */ gpio->ngpio_per_reg = config->ngpio_per_reg; if (!gpio->ngpio_per_reg) @@ -300,7 +319,7 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config ret = gpiochip_add_data(chip, gpio); if (ret < 0) - goto err_free_gpio; + goto err_free_bitmap; #ifdef CONFIG_REGMAP_IRQ if (config->regmap_irq_chip) { @@ -309,7 +328,7 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config config->regmap_irq_line, config->regmap_irq_flags, 0, config->regmap_irq_chip, &gpio->irq_chip_data); if (ret) - goto err_free_gpio; + goto err_free_bitmap; irq_domain = regmap_irq_get_domain(gpio->irq_chip_data); } else @@ -326,6 +345,8 @@ struct gpio_regmap *gpio_regmap_register(const struct gpio_regmap_config *config err_remove_gpiochip: gpiochip_remove(chip); +err_free_bitmap: + bitmap_free(gpio->fixed_direction_output); err_free_gpio: kfree(gpio); return ERR_PTR(ret); @@ -344,6 +365,7 @@ void gpio_regmap_unregister(struct gpio_regmap *gpio) #endif gpiochip_remove(&gpio->gpio_chip); + bitmap_free(gpio->fixed_direction_output); kfree(gpio); } EXPORT_SYMBOL_GPL(gpio_regmap_unregister); diff --git a/include/linux/gpio/regmap.h b/include/linux/gpio/regmap.h index 622a2939ebe0..87983a5f3681 100644 --- a/include/linux/gpio/regmap.h +++ b/include/linux/gpio/regmap.h @@ -38,6 +38,10 @@ struct regmap; * offset to a register/bitmask pair. If not * given the default gpio_regmap_simple_xlate() * is used. + * @fixed_direction_output: + * (Optional) Bitmap representing the fixed direction of + * the GPIO lines. Useful when there are GPIO lines with a + * fixed direction mixed together in the same register. * @drvdata: (Optional) Pointer to driver specific data which is * not used by gpio-remap but is provided "as is" to the * driver callback(s). @@ -85,6 +89,7 @@ struct gpio_regmap_config { int reg_stride; int ngpio_per_reg; struct irq_domain *irq_domain; + unsigned long *fixed_direction_output; #ifdef CONFIG_REGMAP_IRQ struct regmap_irq_chip *regmap_irq_chip; From 2ba5772e530f73eb847fb96ce6c4017894869552 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Mon, 20 Oct 2025 17:51:46 +0900 Subject: [PATCH 135/263] gpio: idio-16: Define fixed direction of the GPIO lines The direction of the IDIO-16 GPIO lines is fixed with the first 16 lines as output and the remaining 16 lines as input. Set the gpio_config fixed_direction_output member to represent the fixed direction of the GPIO lines. Fixes: db02247827ef ("gpio: idio-16: Migrate to the regmap API") Reported-by: Mark Cave-Ayland Closes: https://lore.kernel.org/r/9b0375fd-235f-4ee1-a7fa-daca296ef6bf@nutanix.com Suggested-by: Michael Walle Cc: stable@vger.kernel.org # ae495810cffe: gpio: regmap: add the .fixed_direction_output configuration parameter Cc: stable@vger.kernel.org Reviewed-by: Andy Shevchenko Signed-off-by: William Breathitt Gray Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20251020-fix-gpio-idio-16-regmap-v2-3-ebeb50e93c33@kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-idio-16.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c index 0103be977c66..4fbae6f6a497 100644 --- a/drivers/gpio/gpio-idio-16.c +++ b/drivers/gpio/gpio-idio-16.c @@ -6,6 +6,7 @@ #define DEFAULT_SYMBOL_NAMESPACE "GPIO_IDIO_16" +#include #include #include #include @@ -107,6 +108,7 @@ int devm_idio_16_regmap_register(struct device *const dev, struct idio_16_data *data; struct regmap_irq_chip *chip; struct regmap_irq_chip_data *chip_data; + DECLARE_BITMAP(fixed_direction_output, IDIO_16_NGPIO); if (!config->parent) return -EINVAL; @@ -164,6 +166,9 @@ int devm_idio_16_regmap_register(struct device *const dev, gpio_config.irq_domain = regmap_irq_get_domain(chip_data); gpio_config.reg_mask_xlate = idio_16_reg_mask_xlate; + bitmap_from_u64(fixed_direction_output, GENMASK_U64(15, 0)); + gpio_config.fixed_direction_output = fixed_direction_output; + return PTR_ERR_OR_ZERO(devm_gpio_regmap_register(dev, &gpio_config)); } EXPORT_SYMBOL_GPL(devm_idio_16_regmap_register); From 37b9dd0d114a0e38c502695e30f55a74fb0c37d0 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 22 Oct 2025 00:25:45 +0200 Subject: [PATCH 136/263] usb: raw-gadget: do not limit transfer length Drop the check on the maximum transfer length in Raw Gadget for both control and non-control transfers. Limiting the transfer length causes a problem with emulating USB devices whose full configuration descriptor exceeds PAGE_SIZE in length. Overall, there does not appear to be any reason to enforce any kind of transfer length limit on the Raw Gadget side for either control or non-control transfers, so let's just drop the related check. Cc: stable Fixes: f2c2e717642c ("usb: gadget: add raw-gadget interface") Signed-off-by: Andrey Konovalov Link: https://patch.msgid.link/a6024e8eab679043e9b8a5defdb41c4bda62f02b.1761085528.git.andreyknvl@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/raw_gadget.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 20165e1582d9..b71680c58de6 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -667,8 +667,6 @@ static void *raw_alloc_io_data(struct usb_raw_ep_io *io, void __user *ptr, return ERR_PTR(-EINVAL); if (!usb_raw_io_flags_valid(io->flags)) return ERR_PTR(-EINVAL); - if (io->length > PAGE_SIZE) - return ERR_PTR(-EINVAL); if (get_from_user) data = memdup_user(ptr + sizeof(*io), io->length); else { From dfc2cf4dcaa03601cd4ca0f7def88b2630fca6ab Mon Sep 17 00:00:00 2001 From: Tim Guttzeit Date: Mon, 20 Oct 2025 15:39:04 +0200 Subject: [PATCH 137/263] usb/core/quirks: Add Huawei ME906S to wakeup quirk The list of Huawei LTE modules needing the quirk fixing spurious wakeups was missing the IDs of the Huawei ME906S module, therefore suspend did not work. Cc: stable Signed-off-by: Tim Guttzeit Signed-off-by: Werner Sembach Link: https://patch.msgid.link/20251020134304.35079-1-wse@tuxedocomputers.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index f5bc53875330..47f589c4104a 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -467,6 +467,8 @@ static const struct usb_device_id usb_quirk_list[] = { /* Huawei 4G LTE module */ { USB_DEVICE(0x12d1, 0x15bb), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, + { USB_DEVICE(0x12d1, 0x15c1), .driver_info = + USB_QUIRK_DISCONNECT_SUSPEND }, { USB_DEVICE(0x12d1, 0x15c3), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, From 2d8713f807a49b8a67c221670e50ae04967e915d Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Mon, 13 Oct 2025 11:43:40 +0200 Subject: [PATCH 138/263] tcpm: switch check for role_sw device with fw_node When there is no port entry in the tcpci entry itself, the driver will trigger an error message "OF: graph: no port node found in /...../typec" . It is documented that the dts node should contain an connector entry with ports and several port pointing to devices with usb-role-switch property set. Only when those connector entry is missing, it should check for port entries in the main node. We switch the search order for looking after ports, which will avoid the failure message while there are explicit connector entries. Fixes: d56de8c9a17d ("usb: typec: tcpm: try to get role switch from tcpc fwnode") Cc: stable Signed-off-by: Michael Grzeschik Reviewed-by: Heikki Krogerus Reviewed-by: Badhri Jagan Sridharan Link: https://patch.msgid.link/20251013-b4-ml-topic-tcpm-v2-1-63c9b2ab8a0b@pengutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index b2a568a5bc9b..cc78770509db 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -7876,9 +7876,9 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) port->partner_desc.identity = &port->partner_ident; - port->role_sw = usb_role_switch_get(port->dev); + port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); if (!port->role_sw) - port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); + port->role_sw = usb_role_switch_get(port->dev); if (IS_ERR(port->role_sw)) { err = PTR_ERR(port->role_sw); goto out_destroy_wq; From bd721ec7dedcc24ced51559e42a39140b59dfd08 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Oct 2025 11:29:56 -0700 Subject: [PATCH 139/263] xfs: don't set bt_nr_sectors to a negative number xfs_daddr_t is a signed type, which means that xfs_buf_map_verify is using a signed comparison. This causes problems if bt_nr_sectors is never overridden (e.g. in the case of an xfbtree for rmap btree repairs) because even daddr 0 can't pass the verifier test in that case. Define an explicit max constant and set the initial bt_nr_sectors to a positive value. Found by xfs/422. Cc: stable@vger.kernel.org # v6.18-rc1 Fixes: 42852fe57c6d2a ("xfs: track the number of blocks in each buftarg") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_buf.c | 2 +- fs/xfs/xfs_buf.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 773d959965dc..47edf3041631 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1751,7 +1751,7 @@ xfs_init_buftarg( const char *descr) { /* The maximum size of the buftarg is only known once the sb is read. */ - btp->bt_nr_sectors = (xfs_daddr_t)-1; + btp->bt_nr_sectors = XFS_BUF_DADDR_MAX; /* Set up device logical sector size mask */ btp->bt_logical_sectorsize = logical_sectorsize; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 8fa7bdf59c91..e25cd2a160f3 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -22,6 +22,7 @@ extern struct kmem_cache *xfs_buf_cache; */ struct xfs_buf; +#define XFS_BUF_DADDR_MAX ((xfs_daddr_t) S64_MAX) #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) #define XBF_READ (1u << 0) /* buffer intended for reading from device */ From 630785bfbe12c3ee3ebccd8b530a98d632b7e39d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Oct 2025 11:30:12 -0700 Subject: [PATCH 140/263] xfs: always warn about deprecated mount options The deprecation of the 'attr2' mount option in 6.18 wasn't entirely successful because nobody noticed that the kernel never printed a warning about attr2 being set in fstab if the only xfs filesystem is the root fs; the initramfs mounts the root fs with no mount options; and the init scripts only conveyed the fstab options by remounting the root fs. Fix this by making it complain all the time. Cc: stable@vger.kernel.org # v5.13 Fixes: 92cf7d36384b99 ("xfs: Skip repetitive warnings about mount options") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Carlos Maiolino Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_super.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 9d51186b24dd..c53f2edf92e7 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1379,16 +1379,25 @@ suffix_kstrtoull( static inline void xfs_fs_warn_deprecated( struct fs_context *fc, - struct fs_parameter *param, - uint64_t flag, - bool value) + struct fs_parameter *param) { - /* Don't print the warning if reconfiguring and current mount point - * already had the flag set + /* + * Always warn about someone passing in a deprecated mount option. + * Previously we wouldn't print the warning if we were reconfiguring + * and current mount point already had the flag set, but that was not + * the right thing to do. + * + * Many distributions mount the root filesystem with no options in the + * initramfs and rely on mount -a to remount the root fs with the + * options in fstab. However, the old behavior meant that there would + * never be a warning about deprecated mount options for the root fs in + * /etc/fstab. On a single-fs system, that means no warning at all. + * + * Compounding this problem are distribution scripts that copy + * /proc/mounts to fstab, which means that we can't remove mount + * options unless we're 100% sure they have only ever been advertised + * in /proc/mounts in response to explicitly provided mount options. */ - if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && - !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) - return; xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); } From 3e7ec343f066cb3b6916239680ab6ad44537b453 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Oct 2025 11:30:27 -0700 Subject: [PATCH 141/263] xfs: loudly complain about defunct mount options Apparently we can never deprecate mount options in this project, because it will invariably turn out that some foolish userspace depends on some behavior and break. From Oleksandr Natalenko: In v6.18, the attr2 XFS mount option is removed. This may silently break system boot if the attr2 option is still present in /etc/fstab for rootfs. Consider Arch Linux that is being set up from scratch with / being formatted as XFS. The genfstab command that is used to generate /etc/fstab produces something like this by default: /dev/sda2 on / type xfs (rw,relatime,attr2,discard,inode64,logbufs=8,logbsize=32k,noquota) Once the system is set up and rebooted, there's no deprecation warning seen in the kernel log: # cat /proc/cmdline root=UUID=77b42de2-397e-47ee-a1ef-4dfd430e47e9 rootflags=discard rd.luks.options=discard quiet # dmesg | grep -i xfs [ 2.409818] SGI XFS with ACLs, security attributes, realtime, scrub, repair, quota, no debug enabled [ 2.415341] XFS (sda2): Mounting V5 Filesystem 77b42de2-397e-47ee-a1ef-4dfd430e47e9 [ 2.442546] XFS (sda2): Ending clean mount Although as per the deprecation intention, it should be there. Vlastimil (in Cc) suggests this is because xfs_fs_warn_deprecated() doesn't produce any warning by design if the XFS FS is set to be rootfs and gets remounted read-write during boot. This imposes two problems: 1) a user doesn't see the deprecation warning; and 2) with v6.18 kernel, the read-write remount fails because of unknown attr2 option rendering system unusable: systemd[1]: Switching root. systemd-remount-fs[225]: /usr/bin/mount for / exited with exit status 32. # mount -o rw / mount: /: fsconfig() failed: xfs: Unknown parameter 'attr2'. Thorsten (in Cc) suggested reporting this as a user-visible regression. From my PoV, although the deprecation is in place for 5 years already, it may not be visible enough as the warning is not emitted for rootfs. Considering the amount of systems set up with XFS on /, this may impose a mass problem for users. Vlastimil suggested making attr2 option a complete noop instead of removing it. IOWs, the initrd mounts the root fs with (I assume) no mount options, and mount -a remounts with whatever options are in fstab. However, XFS doesn't complain about deprecated mount options during a remount, so technically speaking we were not warning all users in all combinations that they were heading for a cliff. Gotcha!! Now, how did 'attr2' get slurped up on so many systems? The old code would put that in /proc/mounts if the filesystem happened to be in attr2 mode, even if user hadn't mounted with any such option. IOWs, this is because someone thought it would be a good idea to advertise system state via /proc/mounts. The easy way to fix this is to reintroduce the four mount options but map them to a no-op option that ignores them, and hope that nobody's depending on attr2 to appear in /proc/mounts. (Hint: use the fsgeometry ioctl). But we've learned our lesson, so complain as LOUDLY as possible about the deprecation. Lessons learned: 1. Don't expose system state via /proc/mounts; the only strings that ought to be there are options *explicitly* provided by the user. 2. Never tidy, it's not worth the stress and irritation. Reported-by: Vlastimil Babka Reported-by: Oleksandr Natalenko Cc: stable@vger.kernel.org # v6.18-rc1 Fixes: b9a176e54162f8 ("xfs: remove deprecated mount options") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/xfs_super.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index c53f2edf92e7..1067ebb3b001 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -102,7 +102,7 @@ static const struct constant_table dax_param_enums[] = { * Table driven mount option parser. */ enum { - Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, + Op_deprecated, Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, @@ -114,7 +114,21 @@ enum { Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, }; +#define fsparam_dead(NAME) \ + __fsparam(NULL, (NAME), Op_deprecated, fs_param_deprecated, NULL) + static const struct fs_parameter_spec xfs_fs_parameters[] = { + /* + * These mount options were supposed to be deprecated in September 2025 + * but the deprecation warning was buggy, so not all users were + * notified. The deprecation is now obnoxiously loud and postponed to + * September 2030. + */ + fsparam_dead("attr2"), + fsparam_dead("noattr2"), + fsparam_dead("ikeep"), + fsparam_dead("noikeep"), + fsparam_u32("logbufs", Opt_logbufs), fsparam_string("logbsize", Opt_logbsize), fsparam_string("logdev", Opt_logdev), @@ -1423,6 +1437,9 @@ xfs_fs_parse_param( return opt; switch (opt) { + case Op_deprecated: + xfs_fs_warn_deprecated(fc, param); + return 0; case Opt_logbufs: parsing_mp->m_logbufs = result.uint_32; return 0; @@ -1543,7 +1560,6 @@ xfs_fs_parse_param( xfs_mount_set_dax_mode(parsing_mp, result.uint_32); return 0; #endif - /* Following mount options will be removed in September 2025 */ case Opt_max_open_zones: parsing_mp->m_max_open_zones = result.uint_32; return 0; From f477af0cfa0487eddec66ffe10fd9df628ba6f52 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 21 Oct 2025 11:30:43 -0700 Subject: [PATCH 142/263] xfs: fix locking in xchk_nlinks_collect_dir On a filesystem with parent pointers, xchk_nlinks_collect_dir walks both the directory entries (data fork) and the parent pointers (attr fork) to determine the correct link count. Unfortunately I forgot to update the lock mode logic to handle the case of a directory whose attr fork is in btree format and has not yet been loaded *and* whose data fork doesn't need loading. This leads to a bunch of assertions from xfs/286 in xfs_iread_extents because we only took ILOCK_SHARED, not ILOCK_EXCL. You'd need the rare happenstance of a directory with a large number of non-pptr extended attributes set and enough memory pressure to cause the directory to be evicted and partially reloaded from disk. I /think/ this only started in 6.18-rc1 because I've started seeing OOM errors with the maple tree slab using 70% of memory, and this didn't happen in 6.17. Yay dynamic systems! Cc: stable@vger.kernel.org # v6.10 Fixes: 77ede5f44b0d86 ("xfs: walk directory parent pointers to determine backref count") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino --- fs/xfs/scrub/nlinks.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 26721fab5cab..091c79e432e5 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -376,6 +376,36 @@ xchk_nlinks_collect_pptr( return error; } +static uint +xchk_nlinks_ilock_dir( + struct xfs_inode *ip) +{ + uint lock_mode = XFS_ILOCK_SHARED; + + /* + * We're going to scan the directory entries, so we must be ready to + * pull the data fork mappings into memory if they aren't already. + */ + if (xfs_need_iread_extents(&ip->i_df)) + lock_mode = XFS_ILOCK_EXCL; + + /* + * We're going to scan the parent pointers, so we must be ready to + * pull the attr fork mappings into memory if they aren't already. + */ + if (xfs_has_parent(ip->i_mount) && xfs_inode_has_attr_fork(ip) && + xfs_need_iread_extents(&ip->i_af)) + lock_mode = XFS_ILOCK_EXCL; + + /* + * Take the IOLOCK so that other threads cannot start a directory + * update while we're scanning. + */ + lock_mode |= XFS_IOLOCK_SHARED; + xfs_ilock(ip, lock_mode); + return lock_mode; +} + /* Walk a directory to bump the observed link counts of the children. */ STATIC int xchk_nlinks_collect_dir( @@ -394,8 +424,7 @@ xchk_nlinks_collect_dir( return 0; /* Prevent anyone from changing this directory while we walk it. */ - xfs_ilock(dp, XFS_IOLOCK_SHARED); - lock_mode = xfs_ilock_data_map_shared(dp); + lock_mode = xchk_nlinks_ilock_dir(dp); /* * The dotdot entry of an unlinked directory still points to the last @@ -452,7 +481,6 @@ xchk_nlinks_collect_dir( xchk_iscan_abort(&xnc->collect_iscan); out_unlock: xfs_iunlock(dp, lock_mode); - xfs_iunlock(dp, XFS_IOLOCK_SHARED); return error; } From ef8fef45c74b5a0059488fda2df65fa133f7d7d0 Mon Sep 17 00:00:00 2001 From: Cosmin Tanislav Date: Tue, 23 Sep 2025 18:47:06 +0300 Subject: [PATCH 143/263] tty: serial: sh-sci: fix RSCI FIFO overrun handling The receive error handling code is shared between RSCI and all other SCIF port types, but the RSCI overrun_reg is specified as a memory offset, while for other SCIF types it is an enum value used to index into the sci_port_params->regs array, as mentioned above the sci_serial_in() function. For RSCI, the overrun_reg is CSR (0x48), causing the sci_getreg() call inside the sci_handle_fifo_overrun() function to index outside the bounds of the regs array, which currently has a size of 20, as specified by SCI_NR_REGS. Because of this, we end up accessing memory outside of RSCI's rsci_port_params structure, which, when interpreted as a plat_sci_reg, happens to have a non-zero size, causing the following WARN when sci_serial_in() is called, as the accidental size does not match the supported register sizes. The existence of the overrun_reg needs to be checked because SCIx_SH3_SCIF_REGTYPE has overrun_reg set to SCLSR, but SCLSR is not present in the regs array. Avoid calling sci_getreg() for port types which don't use standard register handling. Use the ops->read_reg() and ops->write_reg() functions to properly read and write registers for RSCI, and change the type of the status variable to accommodate the 32-bit CSR register. sci_getreg() and sci_serial_in() are also called with overrun_reg in the sci_mpxed_interrupt() interrupt handler, but that code path is not used for RSCI, as it does not have a muxed interrupt. ------------[ cut here ]------------ Invalid register access WARNING: CPU: 0 PID: 0 at drivers/tty/serial/sh-sci.c:522 sci_serial_in+0x38/0xac Modules linked in: renesas_usbhs at24 rzt2h_adc industrialio_adc sha256 cfg80211 bluetooth ecdh_generic ecc rfkill fuse drm backlight ipv6 CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.17.0-rc1+ #30 PREEMPT Hardware name: Renesas RZ/T2H EVK Board based on r9a09g077m44 (DT) pstate: 604000c5 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : sci_serial_in+0x38/0xac lr : sci_serial_in+0x38/0xac sp : ffff800080003e80 x29: ffff800080003e80 x28: ffff800082195b80 x27: 000000000000000d x26: ffff8000821956d0 x25: 0000000000000000 x24: ffff800082195b80 x23: ffff000180e0d800 x22: 0000000000000010 x21: 0000000000000000 x20: 0000000000000010 x19: ffff000180e72000 x18: 000000000000000a x17: ffff8002bcee7000 x16: ffff800080000000 x15: 0720072007200720 x14: 0720072007200720 x13: 0720072007200720 x12: 0720072007200720 x11: 0000000000000058 x10: 0000000000000018 x9 : ffff8000821a6a48 x8 : 0000000000057fa8 x7 : 0000000000000406 x6 : ffff8000821fea48 x5 : ffff00033ef88408 x4 : ffff8002bcee7000 x3 : ffff800082195b80 x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff800082195b80 Call trace: sci_serial_in+0x38/0xac (P) sci_handle_fifo_overrun.isra.0+0x70/0x134 sci_er_interrupt+0x50/0x39c __handle_irq_event_percpu+0x48/0x140 handle_irq_event+0x44/0xb0 handle_fasteoi_irq+0xf4/0x1a0 handle_irq_desc+0x34/0x58 generic_handle_domain_irq+0x1c/0x28 gic_handle_irq+0x4c/0x140 call_on_irq_stack+0x30/0x48 do_interrupt_handler+0x80/0x84 el1_interrupt+0x34/0x68 el1h_64_irq_handler+0x18/0x24 el1h_64_irq+0x6c/0x70 default_idle_call+0x28/0x58 (P) do_idle+0x1f8/0x250 cpu_startup_entry+0x34/0x3c rest_init+0xd8/0xe0 console_on_rootfs+0x0/0x6c __primary_switched+0x88/0x90 ---[ end trace 0000000000000000 ]--- Cc: stable Fixes: 0666e3fe95ab ("serial: sh-sci: Add support for RZ/T2H SCI") Signed-off-by: Cosmin Tanislav Link: https://patch.msgid.link/20250923154707.1089900-1-cosmin-gabriel.tanislav.xa@renesas.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 538b2f991609..62bb62b82cbe 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1014,16 +1014,18 @@ static int sci_handle_fifo_overrun(struct uart_port *port) struct sci_port *s = to_sci_port(port); const struct plat_sci_reg *reg; int copied = 0; - u16 status; + u32 status; - reg = sci_getreg(port, s->params->overrun_reg); - if (!reg->size) - return 0; + if (s->type != SCI_PORT_RSCI) { + reg = sci_getreg(port, s->params->overrun_reg); + if (!reg->size) + return 0; + } - status = sci_serial_in(port, s->params->overrun_reg); + status = s->ops->read_reg(port, s->params->overrun_reg); if (status & s->params->overrun_mask) { status &= ~s->params->overrun_mask; - sci_serial_out(port, s->params->overrun_reg, status); + s->ops->write_reg(port, s->params->overrun_reg, status); port->icount.overrun++; From e7cbce761fe3fcbcb49bcf30d4f8ca5e1a9ee2a0 Mon Sep 17 00:00:00 2001 From: Florian Eckert Date: Wed, 24 Sep 2025 15:41:15 +0200 Subject: [PATCH 144/263] serial: 8250_exar: add support for Advantech 2 port card with Device ID 0x0018 The Advantech 2-port serial card with PCI vendor=0x13fe and device=0x0018 has a 'XR17V35X' chip installed on the circuit board. Therefore, this driver can be used instead of theu outdated out-of-tree driver from the manufacturer. Signed-off-by: Florian Eckert Cc: stable Link: https://patch.msgid.link/20250924134115.2667650-1-fe@dev.tdt.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_exar.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 04a0cbab02c2..b9cc0b786ca6 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -40,6 +40,8 @@ #define PCI_DEVICE_ID_ACCESSIO_COM_4SM 0x10db #define PCI_DEVICE_ID_ACCESSIO_COM_8SM 0x10ea +#define PCI_DEVICE_ID_ADVANTECH_XR17V352 0x0018 + #define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002 #define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004 #define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a @@ -1622,6 +1624,12 @@ static const struct exar8250_board pbn_fastcom35x_8 = { .exit = pci_xr17v35x_exit, }; +static const struct exar8250_board pbn_adv_XR17V352 = { + .num_ports = 2, + .setup = pci_xr17v35x_setup, + .exit = pci_xr17v35x_exit, +}; + static const struct exar8250_board pbn_exar_XR17V4358 = { .num_ports = 12, .setup = pci_xr17v35x_setup, @@ -1696,6 +1704,9 @@ static const struct pci_device_id exar_pci_tbl[] = { USR_DEVICE(XR17C152, 2980, pbn_exar_XR17C15x), USR_DEVICE(XR17C152, 2981, pbn_exar_XR17C15x), + /* ADVANTECH devices */ + EXAR_DEVICE(ADVANTECH, XR17V352, pbn_adv_XR17V352), + /* Exar Corp. XR17C15[248] Dual/Quad/Octal UART */ EXAR_DEVICE(EXAR, XR17C152, pbn_exar_XR17C15x), EXAR_DEVICE(EXAR, XR17C154, pbn_exar_XR17C15x), From 1c05bf6c0262f946571a37678250193e46b1ff0f Mon Sep 17 00:00:00 2001 From: Hugo Villeneuve Date: Mon, 6 Oct 2025 10:20:02 -0400 Subject: [PATCH 145/263] serial: sc16is7xx: remove useless enable of enhanced features Commit 43c51bb573aa ("sc16is7xx: make sure device is in suspend once probed") permanently enabled access to the enhanced features in sc16is7xx_probe(), and it is never disabled after that. Therefore, remove re-enable of enhanced features in sc16is7xx_set_baud(). This eliminates a potential useless read + write cycle each time the baud rate is reconfigured. Fixes: 43c51bb573aa ("sc16is7xx: make sure device is in suspend once probed") Cc: stable Signed-off-by: Hugo Villeneuve Link: https://patch.msgid.link/20251006142002.177475-1-hugo@hugovil.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 1a2c4c14f6aa..c7435595dce1 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -588,13 +588,6 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) div /= prescaler; } - /* Enable enhanced features */ - sc16is7xx_efr_lock(port); - sc16is7xx_port_update(port, SC16IS7XX_EFR_REG, - SC16IS7XX_EFR_ENABLE_BIT, - SC16IS7XX_EFR_ENABLE_BIT); - sc16is7xx_efr_unlock(port); - /* If bit MCR_CLKSEL is set, the divide by 4 prescaler is activated. */ sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_CLKSEL_BIT, From ea9f6d316782bf36141df764634a53d085061091 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 8 Oct 2025 12:50:36 +0200 Subject: [PATCH 146/263] dt-bindings: serial: sh-sci: Fix r8a78000 interrupts The SCIF instances on R-Car Gen5 have a single interrupt, just like on other R-Car SoCs. Fixes: 6ac1d60473727931 ("dt-bindings: serial: sh-sci: Document r8a78000 bindings") Cc: stable Signed-off-by: Geert Uytterhoeven Acked-by: Kuninori Morimoto Acked-by: Conor Dooley Link: https://patch.msgid.link/09bc9881b31bdb948ce8b69a2b5acf633f5505a4.1759920441.git.geert+renesas@glider.be Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/renesas,scif.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/serial/renesas,scif.yaml b/Documentation/devicetree/bindings/serial/renesas,scif.yaml index e925cd4c3ac8..72483bc3274d 100644 --- a/Documentation/devicetree/bindings/serial/renesas,scif.yaml +++ b/Documentation/devicetree/bindings/serial/renesas,scif.yaml @@ -197,6 +197,7 @@ allOf: - renesas,rcar-gen2-scif - renesas,rcar-gen3-scif - renesas,rcar-gen4-scif + - renesas,rcar-gen5-scif then: properties: interrupts: From daeb4037adf7d3349b4a1fb792f4bc9824686a4b Mon Sep 17 00:00:00 2001 From: Artem Shimko Date: Sun, 19 Oct 2025 12:51:31 +0300 Subject: [PATCH 147/263] serial: 8250_dw: handle reset control deassert error Check the return value of reset_control_deassert() in the probe function to prevent continuing probe when reset deassertion fails. Previously, reset_control_deassert() was called without checking its return value, which could lead to probe continuing even when the device reset wasn't properly deasserted. The fix checks the return value and returns an error with dev_err_probe() if reset deassertion fails, providing better error handling and diagnostics. Fixes: acbdad8dd1ab ("serial: 8250_dw: simplify optional reset handling") Cc: stable Signed-off-by: Artem Shimko Link: https://patch.msgid.link/20251019095131.252848-1-a.shimko.dev@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_dw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index a53ba04d9770..710ae4d40aec 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -635,7 +635,9 @@ static int dw8250_probe(struct platform_device *pdev) if (IS_ERR(data->rst)) return PTR_ERR(data->rst); - reset_control_deassert(data->rst); + err = reset_control_deassert(data->rst); + if (err) + return dev_err_probe(dev, err, "failed to deassert resets\n"); err = devm_add_action_or_reset(dev, dw8250_reset_control_assert, data->rst); if (err) From d518314a1fa4e980a227d1b2bda1badf433cb932 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Tue, 16 Sep 2025 22:37:27 +0100 Subject: [PATCH 148/263] serial: 8250_mtk: Enable baud clock and manage in runtime PM Some MediaTek SoCs got a gated UART baud clock, which currently gets disabled as the clk subsystem believes it would be unused. This results in the uart freezing right after "clk: Disabling unused clocks" on those platforms. Request the baud clock to be prepared and enabled during probe, and to restore run-time power management capabilities to what it was before commit e32a83c70cf9 ("serial: 8250-mtk: modify mtk uart power and clock management") disable and unprepare the baud clock when suspending the UART, prepare and enable it again when resuming it. Fixes: e32a83c70cf9 ("serial: 8250-mtk: modify mtk uart power and clock management") Fixes: b6c7ff2693ddc ("serial: 8250_mtk: Simplify clock sequencing and runtime PM") Cc: stable Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Daniel Golle Link: https://patch.msgid.link/de5197ccc31e1dab0965cabcc11ca92e67246cf6.1758058441.git.daniel@makrotopia.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_mtk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c index b44de2ed7413..5875a7b9b4b1 100644 --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c @@ -435,6 +435,7 @@ static int __maybe_unused mtk8250_runtime_suspend(struct device *dev) while (serial_in(up, MTK_UART_DEBUG0)); + clk_disable_unprepare(data->uart_clk); clk_disable_unprepare(data->bus_clk); return 0; @@ -445,6 +446,7 @@ static int __maybe_unused mtk8250_runtime_resume(struct device *dev) struct mtk8250_data *data = dev_get_drvdata(dev); clk_prepare_enable(data->bus_clk); + clk_prepare_enable(data->uart_clk); return 0; } @@ -475,13 +477,13 @@ static int mtk8250_probe_of(struct platform_device *pdev, struct uart_port *p, int dmacnt; #endif - data->uart_clk = devm_clk_get(&pdev->dev, "baud"); + data->uart_clk = devm_clk_get_enabled(&pdev->dev, "baud"); if (IS_ERR(data->uart_clk)) { /* * For compatibility with older device trees try unnamed * clk when no baud clk can be found. */ - data->uart_clk = devm_clk_get(&pdev->dev, NULL); + data->uart_clk = devm_clk_get_enabled(&pdev->dev, NULL); if (IS_ERR(data->uart_clk)) { dev_warn(&pdev->dev, "Can't get uart clock\n"); return PTR_ERR(data->uart_clk); From 0e4a169d1a2b630c607416d9e3739d80e176ed67 Mon Sep 17 00:00:00 2001 From: K Prateek Nayak Date: Tue, 21 Oct 2025 05:35:22 +0000 Subject: [PATCH 149/263] sched/fair: Start a cfs_rq on throttled hierarchy with PELT clock throttled Matteo reported hitting the assert_list_leaf_cfs_rq() warning from enqueue_task_fair() post commit fe8d238e646e ("sched/fair: Propagate load for throttled cfs_rq") which transitioned to using cfs_rq_pelt_clock_throttled() check for leaf cfs_rq insertions in propagate_entity_cfs_rq(). The "cfs_rq->pelt_clock_throttled" flag is used to indicate if the hierarchy has its PELT frozen. If a cfs_rq's PELT is marked frozen, all its descendants should have their PELT frozen too or weird things can happen as a result of children accumulating PELT signals when the parents have their PELT clock stopped. Another side effect of this is the loss of integrity of the leaf cfs_rq list. As debugged by Aaron, consider the following hierarchy: root(#) / \ A(#) B(*) | C <--- new cgroup | D <--- new cgroup # - Already on leaf cfs_rq list * - Throttled with PELT frozen The newly created cgroups don't have their "pelt_clock_throttled" signal synced with cgroup B. Next, the following series of events occur: 1. online_fair_sched_group() for cgroup D will call propagate_entity_cfs_rq(). (Same can happen if a throttled task is moved to cgroup C and enqueue_task_fair() returns early.) propagate_entity_cfs_rq() adds the cfs_rq of cgroup C to "rq->tmp_alone_branch" since its PELT clock is not marked throttled and cfs_rq of cgroup B is not on the list. cfs_rq of cgroup B is skipped since its PELT is throttled. root cfs_rq already exists on cfs_rq leading to list_add_leaf_cfs_rq() returning early. The cfs_rq of cgroup C is left dangling on the "rq->tmp_alone_branch". 2. A new task wakes up on cgroup A. Since the whole hierarchy is already on the leaf cfs_rq list, list_add_leaf_cfs_rq() keeps returning early without any modifications to "rq->tmp_alone_branch". The final assert_list_leaf_cfs_rq() in enqueue_task_fair() sees the dangling reference to cgroup C's cfs_rq in "rq->tmp_alone_branch". !!! Splat !!! Syncing the "pelt_clock_throttled" indicator with parent cfs_rq is not enough since the new cfs_rq is not yet enqueued on the hierarchy. A dequeue on other subtree on the throttled hierarchy can freeze the PELT clock for the parent hierarchy without setting the indicators for this newly added cfs_rq which was never enqueued. Since there are no tasks on the new hierarchy, start a cfs_rq on a throttled hierarchy with its PELT clock throttled. The first enqueue, or the distribution (whichever happens first) will unfreeze the PELT clock and queue the cfs_rq on the leaf cfs_rq list. While at it, add an assert_list_leaf_cfs_rq() in propagate_entity_cfs_rq() to catch such cases in the future. Closes: https://lore.kernel.org/lkml/58a587d694f33c2ea487c700b0d046fa@codethink.co.uk/ Fixes: e1fad12dcb66 ("sched/fair: Switch to task based throttle model") Reported-by: Matteo Martelli Suggested-by: Aaron Lu Signed-off-by: K Prateek Nayak Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Aaron Lu Tested-by: Aaron Lu Tested-by: Matteo Martelli Link: https://patch.msgid.link/20251021053522.37583-1-kprateek.nayak@amd.com --- kernel/sched/fair.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index cee1793e8277..25970dbbb279 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6437,6 +6437,16 @@ static void sync_throttle(struct task_group *tg, int cpu) cfs_rq->throttle_count = pcfs_rq->throttle_count; cfs_rq->throttled_clock_pelt = rq_clock_pelt(cpu_rq(cpu)); + + /* + * It is not enough to sync the "pelt_clock_throttled" indicator + * with the parent cfs_rq when the hierarchy is not queued. + * Always join a throttled hierarchy with PELT clock throttled + * and leaf it to the first enqueue, or distribution to + * unthrottle the PELT clock. + */ + if (cfs_rq->throttle_count) + cfs_rq->pelt_clock_throttled = 1; } /* conditionally throttle active cfs_rq's from put_prev_entity() */ @@ -13187,6 +13197,8 @@ static void propagate_entity_cfs_rq(struct sched_entity *se) if (!cfs_rq_pelt_clock_throttled(cfs_rq)) list_add_leaf_cfs_rq(cfs_rq); } + + assert_list_leaf_cfs_rq(rq_of(cfs_rq)); } #else /* !CONFIG_FAIR_GROUP_SCHED: */ static void propagate_entity_cfs_rq(struct sched_entity *se) { } From dbdf2a7feb422f9bacfd12774e624cf26f503eb0 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Mon, 20 Oct 2025 04:07:14 +0200 Subject: [PATCH 150/263] objtool/rust: add one more `noreturn` Rust function Between Rust 1.79 and 1.86, under `CONFIG_RUST_KERNEL_DOCTESTS=y`, `objtool` may report: rust/doctests_kernel_generated.o: warning: objtool: rust_doctest_kernel_alloc_kbox_rs_13() falls through to next function rust_doctest_kernel_alloc_kvec_rs_0() (as well as in rust_doctest_kernel_alloc_kvec_rs_0) due to calls to the `noreturn` symbol: core::option::expect_failed from code added in commits 779db37373a3 ("rust: alloc: kvec: implement AsPageIter for VVec") and 671618432f46 ("rust: alloc: kbox: implement AsPageIter for VBox"). Thus add the mangled one to the list so that `objtool` knows it is actually `noreturn`. This can be reproduced as well in other versions by tweaking the code, such as the latest stable Rust (1.90.0). Stable does not have code that triggers this, but it could have it in the future. Downstream forks could too. Thus tag it for backport. See commit 56d680dd23c3 ("objtool/rust: list `noreturn` Rust functions") for more details. Signed-off-by: Miguel Ojeda Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Alice Ryhl Cc: stable@vger.kernel.org # Needed in 6.12.y and later. Link: https://patch.msgid.link/20251020020714.2511718-1-ojeda@kernel.org --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a5770570b106..3c7ab910b189 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -217,6 +217,7 @@ static bool is_rust_noreturn(const struct symbol *func) * these come from the Rust standard library). */ return str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail") || + str_ends_with(func->name, "_4core6option13expect_failed") || str_ends_with(func->name, "_4core6option13unwrap_failed") || str_ends_with(func->name, "_4core6result13unwrap_failed") || str_ends_with(func->name, "_4core9panicking5panic") || From 49c98f30f4021b560676a336f8a46a4f642eee2b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 20 Oct 2025 14:23:58 +0200 Subject: [PATCH 151/263] objtool: Fix failure when being compiled on x32 system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix compilation failure when compiling the kernel with the x32 toolchain. In file included from check.c:16: check.c: In function ¡check_abs_references¢: /usr/src/git/linux-2.6/tools/objtool/include/objtool/warn.h:47:17: error: format ¡%lx¢ expects argument of type ¡long unsigned int¢, but argument 7 has type ¡u64¢ {aka ¡long long unsigned int¢} [-Werror=format=] 47 | "%s%s%s: objtool" extra ": " format "\n", \ | ^~~~~~~~~~~~~~~~~ /usr/src/git/linux-2.6/tools/objtool/include/objtool/warn.h:54:9: note: in expansion of macro ¡___WARN¢ 54 | ___WARN(severity, "", format, ##__VA_ARGS__) | ^~~~~~~ /usr/src/git/linux-2.6/tools/objtool/include/objtool/warn.h:74:27: note: in expansion of macro ¡__WARN¢ 74 | #define WARN(format, ...) __WARN(WARN_STR, format, ##__VA_ARGS__) | ^~~~~~ check.c:4713:33: note: in expansion of macro ¡WARN¢ 4713 | WARN("section %s has absolute relocation at offset 0x%lx", | ^~~~ Fixes: 0d6e4563fc03 ("objtool: Add action to check for absence of absolute relocations") Signed-off-by: Mikulas Patocka Signed-off-by: Peter Zijlstra (Intel) Acked-by: Ard Biesheuvel Link: https://patch.msgid.link/1ac32fff-2e67-5155-f570-69aad5bf5412@redhat.com --- tools/objtool/check.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3c7ab910b189..620854fdaaf6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4711,8 +4711,8 @@ static int check_abs_references(struct objtool_file *file) for_each_reloc(sec->rsec, reloc) { if (arch_absolute_reloc(file->elf, reloc)) { - WARN("section %s has absolute relocation at offset 0x%lx", - sec->name, reloc_offset(reloc)); + WARN("section %s has absolute relocation at offset 0x%llx", + sec->name, (unsigned long long)reloc_offset(reloc)); ret++; } } From 1b824134261d2db08fb6583ccbd05cb71861bd53 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Wed, 22 Oct 2025 02:41:59 +0200 Subject: [PATCH 152/263] spi: dt-bindings: spi-rockchip: Add RK3506 compatible The SPI controller found in the RK3506 SoC is still compatible to the original one introduced with the RK3066, so add the RK3506 compatible to the list of its variants. Signed-off-by: Heiko Stuebner Link: https://patch.msgid.link/20251022004200.204276-1-heiko@sntech.de Signed-off-by: Mark Brown --- Documentation/devicetree/bindings/spi/spi-rockchip.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml index 748faf7f7081..ce6762c92fda 100644 --- a/Documentation/devicetree/bindings/spi/spi-rockchip.yaml +++ b/Documentation/devicetree/bindings/spi/spi-rockchip.yaml @@ -34,6 +34,7 @@ properties: - rockchip,rk3328-spi - rockchip,rk3368-spi - rockchip,rk3399-spi + - rockchip,rk3506-spi - rockchip,rk3528-spi - rockchip,rk3562-spi - rockchip,rk3568-spi From 119aaeed0b6729293f41ea33be05ecd27a947d48 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 21 Oct 2025 14:40:59 +0200 Subject: [PATCH 153/263] of/irq: Add msi-parent check to of_msi_xlate() In some legacy platforms the MSI controller for a PCI host bridge is identified by an msi-parent property whose phandle points at an MSI controller node with no #msi-cells property, that implicitly means #msi-cells == 0. For such platforms, mapping a device ID and retrieving the MSI controller node becomes simply a matter of checking whether in the device hierarchy there is an msi-parent property pointing at an MSI controller node with such characteristics. Add a helper function to of_msi_xlate() to check the msi-parent property in addition to msi-map and retrieve the MSI controller node (with a 1:1 ID deviceID-IN<->deviceID-OUT mapping) to provide support for deviceID mapping and MSI controller node retrieval for such platforms. Fixes: 57d72196dfc8 ("irqchip/gic-v5: Add GICv5 ITS support") Signed-off-by: Lorenzo Pieralisi Reviewed-by: Frank Li Cc: Sascha Bischoff Cc: Rob Herring Cc: Marc Zyngier Link: https://patch.msgid.link/20251021124103.198419-2-lpieralisi@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/irq.c | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 65c3c23255b7..321d40ec229b 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -671,6 +671,36 @@ void __init of_irq_init(const struct of_device_id *matches) } } +static int of_check_msi_parent(struct device_node *dev_node, struct device_node **msi_node) +{ + struct of_phandle_args msi_spec; + int ret; + + /* + * An msi-parent phandle with a missing or == 0 #msi-cells + * property identifies a 1:1 ID translation mapping. + * + * Set the msi controller node if the firmware matches this + * condition. + */ + ret = of_parse_phandle_with_optional_args(dev_node, "msi-parent", "#msi-cells", + 0, &msi_spec); + if (ret) + return ret; + + if ((*msi_node && *msi_node != msi_spec.np) || msi_spec.args_count != 0) + ret = -EINVAL; + + if (!ret) { + /* Return with a node reference held */ + *msi_node = msi_spec.np; + return 0; + } + of_node_put(msi_spec.np); + + return ret; +} + /** * of_msi_xlate - map a MSI ID and find relevant MSI controller node * @dev: device for which the mapping is to be done. @@ -678,7 +708,7 @@ void __init of_irq_init(const struct of_device_id *matches) * @id_in: Device ID. * * Walk up the device hierarchy looking for devices with a "msi-map" - * property. If found, apply the mapping to @id_in. + * or "msi-parent" property. If found, apply the mapping to @id_in. * If @msi_np points to a non-NULL device node pointer, only entries targeting * that node will be matched; if it points to a NULL value, it will receive the * device node of the first matching target phandle, with a reference held. @@ -692,12 +722,15 @@ u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in) /* * Walk up the device parent links looking for one with a - * "msi-map" property. + * "msi-map" or an "msi-parent" property. */ - for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) + for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) { if (!of_map_id(parent_dev->of_node, id_in, "msi-map", "msi-map-mask", msi_np, &id_out)) break; + if (!of_check_msi_parent(parent_dev->of_node, msi_np)) + break; + } return id_out; } From c71af4d6d56665e04634babfc45dce3d9ab58285 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 21 Oct 2025 14:41:00 +0200 Subject: [PATCH 154/263] of/irq: Fix OF node refcount in of_msi_get_domain() In of_msi_get_domain() if the iterator loop stops early because an irq_domain match is detected, an of_node_put() on the iterator node is needed to keep the OF node refcount in sync. Add it. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Frank Li Cc: Rob Herring Link: https://patch.msgid.link/20251021124103.198419-3-lpieralisi@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/irq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 321d40ec229b..ee7d5f0842e8 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -774,8 +774,10 @@ struct irq_domain *of_msi_get_domain(struct device *dev, of_for_each_phandle(&it, err, np, "msi-parent", "#msi-cells", 0) { d = irq_find_matching_host(it.node, token); - if (d) + if (d) { + of_node_put(it.node); return d; + } } return NULL; From 89205c60c0fc96b73567a2e9fe27ee3f59d01193 Mon Sep 17 00:00:00 2001 From: Reinhard Speyerer Date: Wed, 22 Oct 2025 16:17:26 +0200 Subject: [PATCH 155/263] USB: serial: option: add Quectel RG255C Add support for Quectel RG255C devices to complement commit 5c964c8a97c1 ("net: usb: qmi_wwan: add Quectel RG255C"). The composition is DM / NMEA / AT / QMI. T: Bus=01 Lev=02 Prnt=99 Port=01 Cnt=02 Dev#=110 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0316 Rev= 5.15 S: Manufacturer=Quectel S: Product=RG255C-GL S: SerialNumber=xxxxxxxx C:* #Ifs= 4 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=50 Driver=qmi_wwan E: Ad=86(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Reinhard Speyerer Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ed1328648a73..3d6ebe2692a9 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -273,6 +273,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05CN 0x0312 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM05G_RS 0x0314 +#define QUECTEL_PRODUCT_RG255C 0x0316 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 @@ -1271,6 +1272,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG650V, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG255C, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG255C, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RG255C, 0xff, 0xff, 0x40) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, From 4c8cf6bd28d6fea23819f082ddc8063fd6fa963a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Oct 2025 10:33:31 +0200 Subject: [PATCH 156/263] block: require LBA dma_alignment when using PI The block layer PI generation / verification code expects the bio_vecs to have at least LBA size (or more correctly integrity internal) granularity. With the direct I/O alignment relaxation in 2022, user space can now feed bios with less alignment than that, leading to scribbling outside the PI buffers. Apparently this wasn't noticed so far because none of the tests generate such buffers, but since 851c4c96db00 ("xfs: implement XFS_IOC_DIOINFO in terms of vfs_getattr"), xfstests generic/013 by default generates such I/O now that the relaxed alignment is advertised by the XFS_IOC_DIOINFO ioctl. Fix this by increasing the required alignment when using PI, although handling arbitrary alignment in the long run would be even nicer. Fixes: bf8d08532bc1 ("iomap: add support for dma aligned direct-io") Fixes: b1a000d3b8ec ("block: relax direct io memory alignment") Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- block/blk-settings.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index 54cffaae4df4..d74b13ec8e54 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -184,6 +184,16 @@ static int blk_validate_integrity_limits(struct queue_limits *lim) if (!bi->interval_exp) bi->interval_exp = ilog2(lim->logical_block_size); + /* + * The PI generation / validation helpers do not expect intervals to + * straddle multiple bio_vecs. Enforce alignment so that those are + * never generated, and that each buffer is aligned as expected. + */ + if (bi->csum_type) { + lim->dma_alignment = max(lim->dma_alignment, + (1U << bi->interval_exp) - 1); + } + return 0; } From bf5570590a981d0659d0808d2d4bcda21b27a2a5 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 21 Oct 2025 20:38:22 +0100 Subject: [PATCH 157/263] MIPS: Malta: Fix keyboard resource preventing i8042 driver from registering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIPS Malta platform code registers the PCI southbridge legacy port I/O PS/2 keyboard range as a standard resource marked as busy. It prevents the i8042 driver from registering as it fails to claim the resource in a call to i8042_platform_init(). Consequently PS/2 keyboard and mouse devices cannot be used with this platform. Fix the issue by removing the busy marker from the standard reservation, making the driver register successfully: serio: i8042 KBD port at 0x60,0x64 irq 1 serio: i8042 AUX port at 0x60,0x64 irq 12 and the resource show up as expected among the legacy devices: 00000000-00ffffff : MSC PCI I/O 00000000-0000001f : dma1 00000020-00000021 : pic1 00000040-0000005f : timer 00000060-0000006f : keyboard 00000060-0000006f : i8042 00000070-00000077 : rtc0 00000080-0000008f : dma page reg 000000a0-000000a1 : pic2 000000c0-000000df : dma2 [...] If the i8042 driver has not been configured, then the standard resource will remain there preventing any conflicting dynamic assignment of this PCI port I/O address range. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Acked-by: Thomas Bogendoerfer Cc: stable@vger.kernel.org Link: https://patch.msgid.link/alpine.DEB.2.21.2510211919240.8377@angie.orcam.me.uk --- arch/mips/mti-malta/malta-setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index 3a2836e9d856..2a3fd8bbf6c2 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -47,7 +47,7 @@ static struct resource standard_io_resources[] = { .name = "keyboard", .start = 0x60, .end = 0x6f, - .flags = IORESOURCE_IO | IORESOURCE_BUSY + .flags = IORESOURCE_IO }, { .name = "dma page reg", From 1d5d1663619d5a367be538f6a1be1cf5bd2cf494 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 21 Oct 2025 20:38:29 +0100 Subject: [PATCH 158/263] MIPS: Malta: Fix PCI southbridge legacy resource reservations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Covering the PCI southbridge legacy port I/O range with a northbridge resource reservation prevents MIPS Malta platform code from claiming its standard legacy resources. This is because request_resource() calls cause a clash with the previous reservation and consequently fail. Change to using insert_resource() so as to prevent the clash, switching the legacy reservations from: 00000000-00ffffff : MSC PCI I/O 00000020-00000021 : pic1 00000070-00000077 : rtc0 000000a0-000000a1 : pic2 [...] to: 00000000-00ffffff : MSC PCI I/O 00000000-0000001f : dma1 00000020-00000021 : pic1 00000040-0000005f : timer 00000060-0000006f : keyboard 00000070-00000077 : rtc0 00000080-0000008f : dma page reg 000000a0-000000a1 : pic2 000000c0-000000df : dma2 [...] Fixes: ae81aad5c2e1 ("MIPS: PCI: Use pci_enable_resources()") Signed-off-by: Maciej W. Rozycki Signed-off-by: Bjorn Helgaas Reviewed-by: Ilpo Järvinen Acked-by: Thomas Bogendoerfer Cc: stable@vger.kernel.org # v6.18+ Link: https://patch.msgid.link/alpine.DEB.2.21.2510212001250.8377@angie.orcam.me.uk --- arch/mips/mti-malta/malta-setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c index 2a3fd8bbf6c2..816570514c37 100644 --- a/arch/mips/mti-malta/malta-setup.c +++ b/arch/mips/mti-malta/malta-setup.c @@ -213,7 +213,7 @@ void __init plat_mem_setup(void) /* Request I/O space for devices used on the Malta board. */ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++) - request_resource(&ioport_resource, standard_io_resources+i); + insert_resource(&ioport_resource, standard_io_resources + i); /* * Enable DMA channel 4 (cascade channel) in the PIIX4 south bridge. From f294a5fd34db564108a16166d891634a3cb25c68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 17 Oct 2025 14:09:03 +0300 Subject: [PATCH 159/263] MIPS: Malta: Use pcibios_align_resource() to block io range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Maciej W. Rozycki , the mips_pcibios_init() for malta adjusts root bus IO resource start address to prevent interfering with PIIX4 I/O cycle decoding. Adjusting lower bound leaves PIIX4 IO resources outside of the root bus resource and assign_fixed_resource_on_bus() does not link the resources into the resource tree. Prior to commit ae81aad5c2e1 ("MIPS: PCI: Use pci_enable_resources()") the arch specific pcibios_enable_resources() did not check if the resources were assigned which diverges from what PCI core checks, effectively hiding the PIIX4 IO resources were not properly within the resource tree. After starting to use pcibios_enable_resources() from PCI core, enabling PIIX4 fails: ata_piix 0000:00:0a.1: BAR 0 [io 0x01f0-0x01f7]: not claimed; can't enable device ata_piix 0000:00:0a.1: probe with driver ata_piix failed with error -22 MIPS PCI code already has support for enforcing lower bounds using PCIBIOS_MIN_IO in pcibios_align_resource() without altering the IO window start address itself. Make malta PCI code too to use PCIBIOS_MIN_IO. Fixes: ae81aad5c2e1 ("MIPS: PCI: Use pci_enable_resources()") Reported-by: Guenter Roeck Link: https://lore.kernel.org/linux-pci/9085ab12-1559-4462-9b18-f03dcb9a4088@roeck-us.net/ Suggested-by: Maciej W. Rozycki Link: https://lore.kernel.org/linux-pci/alpine.DEB.2.21.2510132229120.39634@angie.orcam.me.uk/ Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Tested-by: Guenter Roeck Tested-by: Maciej W. Rozycki Acked-by: Thomas Bogendoerfer Link: https://patch.msgid.link/20251017110903.1973-1-ilpo.jarvinen@linux.intel.com --- arch/mips/pci/pci-malta.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/mips/pci/pci-malta.c b/arch/mips/pci/pci-malta.c index 6aefdf20ca05..2e35aeba45bc 100644 --- a/arch/mips/pci/pci-malta.c +++ b/arch/mips/pci/pci-malta.c @@ -230,8 +230,7 @@ void __init mips_pcibios_init(void) } /* PIIX4 ACPI starts at 0x1000 */ - if (controller->io_resource->start < 0x00001000UL) - controller->io_resource->start = 0x00001000UL; + PCIBIOS_MIN_IO = 0x1000; iomem_resource.end &= 0xfffffffffULL; /* 64 GB */ ioport_resource.end = controller->io_resource->end; From 8ac9b0d33e5c0a995338ee5f25fe1b6ff7d97f65 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 21 Oct 2025 07:16:08 -0600 Subject: [PATCH 160/263] io_uring/sqpoll: switch away from getrusage() for CPU accounting getrusage() does a lot more than what the SQPOLL accounting needs, the latter only cares about (and uses) the stime. Rather than do a full RUSAGE_SELF summation, just query the used stime instead. Cc: stable@vger.kernel.org Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- io_uring/fdinfo.c | 8 ++++---- io_uring/sqpoll.c | 32 ++++++++++++++++++-------------- io_uring/sqpoll.h | 1 + 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index ff3364531c77..294c75a8a3bd 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { struct io_overflow_cqe *ocqe; struct io_rings *r = ctx->rings; - struct rusage sq_usage; unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; unsigned int sq_head = READ_ONCE(r->sq.head); unsigned int sq_tail = READ_ONCE(r->sq.tail); @@ -152,14 +151,15 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) * thread termination. */ if (tsk) { + u64 usec; + get_task_struct(tsk); rcu_read_unlock(); - getrusage(tsk, RUSAGE_SELF, &sq_usage); + usec = io_sq_cpu_usec(tsk); put_task_struct(tsk); sq_pid = sq->task_pid; sq_cpu = sq->sq_cpu; - sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000 - + sq_usage.ru_stime.tv_usec); + sq_total_time = usec; sq_work_time = sq->work_time; } else { rcu_read_unlock(); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index a3f11349ce06..2b816fdb9866 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -169,6 +170,20 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd) return READ_ONCE(sqd->state); } +u64 io_sq_cpu_usec(struct task_struct *tsk) +{ + u64 utime, stime; + + task_cputime_adjusted(tsk, &utime, &stime); + do_div(stime, 1000); + return stime; +} + +static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec) +{ + sqd->work_time += io_sq_cpu_usec(current) - usec; +} + static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) { unsigned int to_submit; @@ -255,26 +270,15 @@ static bool io_sq_tw_pending(struct llist_node *retry_list) return retry_list || !llist_empty(&tctx->task_list); } -static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start) -{ - struct rusage end; - - getrusage(current, RUSAGE_SELF, &end); - end.ru_stime.tv_sec -= start->ru_stime.tv_sec; - end.ru_stime.tv_usec -= start->ru_stime.tv_usec; - - sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000; -} - static int io_sq_thread(void *data) { struct llist_node *retry_list = NULL; struct io_sq_data *sqd = data; struct io_ring_ctx *ctx; - struct rusage start; unsigned long timeout = 0; char buf[TASK_COMM_LEN] = {}; DEFINE_WAIT(wait); + u64 start; /* offload context creation failed, just exit */ if (!current->io_uring) { @@ -317,7 +321,7 @@ static int io_sq_thread(void *data) } cap_entries = !list_is_singular(&sqd->ctx_list); - getrusage(current, RUSAGE_SELF, &start); + start = io_sq_cpu_usec(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { int ret = __io_sq_thread(ctx, cap_entries); @@ -333,7 +337,7 @@ static int io_sq_thread(void *data) if (sqt_spin || !time_after(jiffies, timeout)) { if (sqt_spin) { - io_sq_update_worktime(sqd, &start); + io_sq_update_worktime(sqd, start); timeout = jiffies + sqd->sq_thread_idle; } if (unlikely(need_resched())) { diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h index b83dcdec9765..fd2f6f29b516 100644 --- a/io_uring/sqpoll.h +++ b/io_uring/sqpoll.h @@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd); void io_put_sq_data(struct io_sq_data *sqd); void io_sqpoll_wait_sq(struct io_ring_ctx *ctx); int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask); +u64 io_sq_cpu_usec(struct task_struct *tsk); static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd) { From a94e0657269c5b8e1a90b17aa2c048b3d276e16d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 21 Oct 2025 11:44:39 -0600 Subject: [PATCH 161/263] io_uring/sqpoll: be smarter on when to update the stime usage The current approach is a bit naive, and hence calls the time querying way too often. Only start the "doing work" timer when there's actual work to do, and then use that information to terminate (and account) the work time once done. This greatly reduces the frequency of these calls, when they cannot have changed anyway. Running a basic random reader that is setup to use SQPOLL, a profile before this change shows these as the top cycle consumers: + 32.60% iou-sqp-1074 [kernel.kallsyms] [k] thread_group_cputime_adjusted + 19.97% iou-sqp-1074 [kernel.kallsyms] [k] thread_group_cputime + 12.20% io_uring io_uring [.] submitter_uring_fn + 4.13% iou-sqp-1074 [kernel.kallsyms] [k] getrusage + 2.45% iou-sqp-1074 [kernel.kallsyms] [k] io_submit_sqes + 2.18% iou-sqp-1074 [kernel.kallsyms] [k] __pi_memset_generic + 2.09% iou-sqp-1074 [kernel.kallsyms] [k] cputime_adjust and after this change, top of profile looks as follows: + 36.23% io_uring io_uring [.] submitter_uring_fn + 23.26% iou-sqp-819 [kernel.kallsyms] [k] io_sq_thread + 10.14% iou-sqp-819 [kernel.kallsyms] [k] io_sq_tw + 6.52% iou-sqp-819 [kernel.kallsyms] [k] tctx_task_work_run + 4.82% iou-sqp-819 [kernel.kallsyms] [k] nvme_submit_cmds.part.0 + 2.91% iou-sqp-819 [kernel.kallsyms] [k] io_submit_sqes [...] 0.02% iou-sqp-819 [kernel.kallsyms] [k] cputime_adjust where it's spending the cycles on things that actually matter. Reported-by: Fengnan Chang Cc: stable@vger.kernel.org Fixes: 3fcb9d17206e ("io_uring/sqpoll: statistics of the true utilization of sq threads") Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 2b816fdb9866..e22f072c7d5f 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -170,6 +170,11 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd) return READ_ONCE(sqd->state); } +struct io_sq_time { + bool started; + u64 usec; +}; + u64 io_sq_cpu_usec(struct task_struct *tsk) { u64 utime, stime; @@ -179,12 +184,24 @@ u64 io_sq_cpu_usec(struct task_struct *tsk) return stime; } -static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec) +static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist) { - sqd->work_time += io_sq_cpu_usec(current) - usec; + if (!ist->started) + return; + ist->started = false; + sqd->work_time += io_sq_cpu_usec(current) - ist->usec; } -static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) +static void io_sq_start_worktime(struct io_sq_time *ist) +{ + if (ist->started) + return; + ist->started = true; + ist->usec = io_sq_cpu_usec(current); +} + +static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd, + bool cap_entries, struct io_sq_time *ist) { unsigned int to_submit; int ret = 0; @@ -197,6 +214,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (to_submit || !wq_list_empty(&ctx->iopoll_list)) { const struct cred *creds = NULL; + io_sq_start_worktime(ist); + if (ctx->sq_creds != current_cred()) creds = override_creds(ctx->sq_creds); @@ -278,7 +297,6 @@ static int io_sq_thread(void *data) unsigned long timeout = 0; char buf[TASK_COMM_LEN] = {}; DEFINE_WAIT(wait); - u64 start; /* offload context creation failed, just exit */ if (!current->io_uring) { @@ -313,6 +331,7 @@ static int io_sq_thread(void *data) mutex_lock(&sqd->lock); while (1) { bool cap_entries, sqt_spin = false; + struct io_sq_time ist = { }; if (io_sqd_events_pending(sqd) || signal_pending(current)) { if (io_sqd_handle_event(sqd)) @@ -321,9 +340,8 @@ static int io_sq_thread(void *data) } cap_entries = !list_is_singular(&sqd->ctx_list); - start = io_sq_cpu_usec(current); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { - int ret = __io_sq_thread(ctx, cap_entries); + int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist); if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) sqt_spin = true; @@ -331,15 +349,18 @@ static int io_sq_thread(void *data) if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE)) sqt_spin = true; - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) - if (io_napi(ctx)) + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + if (io_napi(ctx)) { + io_sq_start_worktime(&ist); io_napi_sqpoll_busy_poll(ctx); + } + } + + io_sq_update_worktime(sqd, &ist); if (sqt_spin || !time_after(jiffies, timeout)) { - if (sqt_spin) { - io_sq_update_worktime(sqd, start); + if (sqt_spin) timeout = jiffies + sqd->sq_thread_idle; - } if (unlikely(need_resched())) { mutex_unlock(&sqd->lock); cond_resched(); From 915651b7c9473fd23d0e56fe227a97eda483cf7c Mon Sep 17 00:00:00 2001 From: Ranganath V N Date: Tue, 21 Oct 2025 22:59:30 +0530 Subject: [PATCH 162/263] io_uring: Fix code indentation error Fix the indentation to ensure consistent code style and improve readability and to fix the errors: ERROR: code indent should use tabs where possible + return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);$ ERROR: code indent should use tabs where possible +^I^I^I struct io_big_cqe *big_cqe)$ Tested by running the /scripts/checkpatch.pl Signed-off-by: Ranganath V N Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 2 +- io_uring/net.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 820ef0527666..296667ba712c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -879,7 +879,7 @@ static inline struct io_cqe io_init_cqe(u64 user_data, s32 res, u32 cflags) } static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe, - struct io_big_cqe *big_cqe) + struct io_big_cqe *big_cqe) { struct io_overflow_cqe *ocqe; diff --git a/io_uring/net.c b/io_uring/net.c index f99b90c762fc..a95cc9ca2a4d 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -383,7 +383,7 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; if (sr->flags & IORING_SEND_VECTORIZED) - return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); + return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE); return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); } From 060aa0b0c26c9e88cfc1433fab3d0145700e8247 Mon Sep 17 00:00:00 2001 From: David Wei Date: Tue, 21 Oct 2025 13:29:44 -0700 Subject: [PATCH 163/263] io_uring zcrx: add MAINTAINERS entry Same as [1] but also with netdev@ as an additional mailing list. io_uring zero copy receive is of particular interest to netdev participants too, given its tight integration to netdev core. With this updated entry, folks running get_maintainer.pl on patches that touch io_uring/zcrx.* will know to send it to netdev@ as well. Note that this doesn't mean all changes require explicit acks from netdev; this is purely for wider visibility and for other contributors to know where to send patches. [1]: https://lore.kernel.org/io-uring/989528e611b51d71fb712691ebfb76d2059ba561.1755461246.git.asml.silence@gmail.com/ Signed-off-by: David Wei Acked-by: Jakub Kicinski Reviewed-by: Mina Almasry [axboe: use correct io_uring tree URL] Signed-off-by: Jens Axboe --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..2ed9efa2d2a5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13110,6 +13110,15 @@ F: include/uapi/linux/io_uring.h F: include/uapi/linux/io_uring/ F: io_uring/ +IO_URING ZCRX +M: Pavel Begunkov +L: io-uring@vger.kernel.org +L: netdev@vger.kernel.org +T: git https://github.com/isilence/linux.git zcrx/for-next +T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git +S: Maintained +F: io_uring/zcrx.* + IPMI SUBSYSTEM M: Corey Minyard L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers) From 1af424b15401d2be789c4dc2279889514e7c5c94 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Oct 2025 20:34:05 -0700 Subject: [PATCH 164/263] lib/crypto: poly1305: Restore dependency of arch code on !KMSAN Restore the dependency of the architecture-optimized Poly1305 code on !KMSAN. It was dropped by commit b646b782e522 ("lib/crypto: poly1305: Consolidate into single module"). Unlike the other hash algorithms in lib/crypto/ (e.g., SHA-512), the way the architecture-optimized Poly1305 code is integrated results in assembly code initializing memory, for several different architectures. Thus, it generates false positive KMSAN warnings. These could be suppressed with kmsan_unpoison_memory(), but it would be needed in quite a few places. For now let's just restore the dependency on !KMSAN. Note: this should have been caught by running poly1305_kunit with CONFIG_KMSAN=y, which I did. However, due to an unrelated KMSAN bug (https://lore.kernel.org/r/20251022030213.GA35717@sol/), KMSAN currently isn't working reliably. Thus, the warning wasn't noticed until later. Fixes: b646b782e522 ("lib/crypto: poly1305: Consolidate into single module") Reported-by: syzbot+01fcd39a0d90cdb0e3df@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/68f6a48f.050a0220.91a22.0452.GAE@google.com/ Reported-by: Pei Xiao Closes: https://lore.kernel.org/r/751b3d80293a6f599bb07770afcef24f623c7da0.1761026343.git.xiaopei01@kylinos.cn/ Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251022033405.64761-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index eea17e36a22b..8886055e938f 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -97,7 +97,7 @@ config CRYPTO_LIB_POLY1305 config CRYPTO_LIB_POLY1305_ARCH bool - depends on CRYPTO_LIB_POLY1305 && !UML + depends on CRYPTO_LIB_POLY1305 && !UML && !KMSAN default y if ARM default y if ARM64 && KERNEL_MODE_NEON default y if MIPS From 0bd73ae09ba1b73137d0830b21820d24700e09b1 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Fri, 17 Oct 2025 11:55:02 +0200 Subject: [PATCH 165/263] smb: server: allocate enough space for RW WRs and ib_drain_qp() Make use of rdma_rw_mr_factor() to calculate the number of rw credits and the number of pages per RDMA RW operation. We get the same numbers for iWarp connections, tested with siw.ko and irdma.ko (in iWarp mode). siw: CIFS: max_qp_rd_atom=128, max_fast_reg_page_list_len = 256 CIFS: max_sgl_rd=0, max_sge_rd=1 CIFS: responder_resources=32 max_frmr_depth=256 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 3276800 max_qp_wr 32768 ksmbd: max_fast_reg_page_list_len = 256, max_sgl_rd=0, max_sge_rd=1 ksmbd: device reporting max_cqe 3276800 max_qp_wr 32768 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256 ksmbd: New sc->rw_io.credits: max = 9, num_pages = 256, maxpages=2048 ksmbd: Info: rdma_send_wr 27 + max_send_wr 256 = 283 irdma (in iWarp mode): CIFS: max_qp_rd_atom=127, max_fast_reg_page_list_len = 262144 CIFS: max_sgl_rd=0, max_sge_rd=13 CIFS: responder_resources=32 max_frmr_depth=2048 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: max_fast_reg_page_list_len = 262144, max_sgl_rd=0, max_sge_rd=13 ksmbd: device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256 ksmbd: New sc->rw_io.credits: max = 9, num_pages = 256, maxpages=2048 ksmbd: rdma_send_wr 27 + max_send_wr 256 = 283 This means that we get the different correct numbers for ROCE, tested with rdma_rxe.ko and irdma.ko (in RoCEv2 mode). rxe: CIFS: max_qp_rd_atom=128, max_fast_reg_page_list_len = 512 CIFS: max_sgl_rd=0, max_sge_rd=32 CIFS: responder_resources=32 max_frmr_depth=512 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 32767 max_qp_wr 1048576 ksmbd: max_fast_reg_page_list_len = 512, max_sgl_rd=0, max_sge_rd=32 ksmbd: device reporting max_cqe 32767 max_qp_wr 1048576 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256 ksmbd: New sc->rw_io.credits: max = 65, num_pages = 32, maxpages=2048 ksmbd: rdma_send_wr 65 + max_send_wr 256 = 321 irdma (in RoCEv2 mode): CIFS: max_qp_rd_atom=127, max_fast_reg_page_list_len = 262144, CIFS: max_sgl_rd=0, max_sge_rd=13 CIFS: responder_resources=32 max_frmr_depth=2048 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: max_fast_reg_page_list_len = 262144, max_sgl_rd=0, max_sge_rd=13 ksmbd: device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256, ksmbd: New sc->rw_io.credits: max = 159, num_pages = 13, maxpages=2048 ksmbd: rdma_send_wr 159 + max_send_wr 256 = 415 And rely on rdma_rw_init_qp() to setup ib_mr_pool_init() for RW MRs. ib_mr_pool_destroy() will be called by rdma_rw_cleanup_mrs(). It seems the code was implemented before the rdma_rw_* layer was fully established in the kernel. While there also add additional space for ib_drain_qp(). This should make sure ib_post_send() will never fail because the submission queue is full. Fixes: ddbdc861e37c ("ksmbd: smbd: introduce read/write credits for RDMA read/write") Fixes: 4c564f03e23b ("smb: server: make use of common smbdirect_socket") Fixes: 177368b99243 ("smb: server: make use of common smbdirect_socket_parameters") Fixes: 95475d8886bd ("smb: server: make use smbdirect_socket.rw_io.credits") Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 233 ++++++++++++++++++++------------- 1 file changed, 142 insertions(+), 91 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index a201c5871a77..19b51205dc8c 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -471,7 +471,6 @@ static void free_transport(struct smb_direct_transport *t) if (sc->ib.qp) { ib_drain_qp(sc->ib.qp); - ib_mr_pool_destroy(sc->ib.qp, &sc->ib.qp->rdma_mrs); sc->ib.qp = NULL; rdma_destroy_qp(sc->rdma.cm_id); } @@ -1871,20 +1870,11 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) return ret; } -static unsigned int smb_direct_get_max_fr_pages(struct smbdirect_socket *sc) -{ - return min_t(unsigned int, - sc->ib.dev->attrs.max_fast_reg_page_list_len, - 256); -} - -static int smb_direct_init_params(struct smbdirect_socket *sc, - struct ib_qp_cap *cap) +static int smb_direct_init_params(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; - struct ib_device *device = sc->ib.dev; - int max_send_sges, max_rw_wrs, max_send_wrs; - unsigned int max_sge_per_wr, wrs_per_credit; + int max_send_sges; + unsigned int maxpages; /* need 3 more sge. because a SMB_DIRECT header, SMB2 header, * SMB2 response could be mapped. @@ -1895,67 +1885,18 @@ static int smb_direct_init_params(struct smbdirect_socket *sc, return -EINVAL; } - /* Calculate the number of work requests for RDMA R/W. - * The maximum number of pages which can be registered - * with one Memory region can be transferred with one - * R/W credit. And at least 4 work requests for each credit - * are needed for MR registration, RDMA R/W, local & remote - * MR invalidation. - */ - sc->rw_io.credits.num_pages = smb_direct_get_max_fr_pages(sc); - sc->rw_io.credits.max = DIV_ROUND_UP(sp->max_read_write_size, - (sc->rw_io.credits.num_pages - 1) * - PAGE_SIZE); - - max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge, - device->attrs.max_sge_rd); - max_sge_per_wr = max_t(unsigned int, max_sge_per_wr, - max_send_sges); - wrs_per_credit = max_t(unsigned int, 4, - DIV_ROUND_UP(sc->rw_io.credits.num_pages, - max_sge_per_wr) + 1); - max_rw_wrs = sc->rw_io.credits.max * wrs_per_credit; - - max_send_wrs = sp->send_credit_target + max_rw_wrs; - if (max_send_wrs > device->attrs.max_cqe || - max_send_wrs > device->attrs.max_qp_wr) { - pr_err("consider lowering send_credit_target = %d\n", - sp->send_credit_target); - pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - device->attrs.max_cqe, device->attrs.max_qp_wr); - return -EINVAL; - } - - if (sp->recv_credit_max > device->attrs.max_cqe || - sp->recv_credit_max > device->attrs.max_qp_wr) { - pr_err("consider lowering receive_credit_max = %d\n", - sp->recv_credit_max); - pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n", - device->attrs.max_cqe, device->attrs.max_qp_wr); - return -EINVAL; - } - - if (device->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE) { - pr_err("warning: device max_send_sge = %d too small\n", - device->attrs.max_send_sge); - return -EINVAL; - } - if (device->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { - pr_err("warning: device max_recv_sge = %d too small\n", - device->attrs.max_recv_sge); - return -EINVAL; - } + maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); + sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, + sc->rdma.cm_id->port_num, + maxpages); + sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max); + /* add one extra in order to handle unaligned pages */ + sc->rw_io.credits.max += 1; sc->recv_io.credits.target = 1; atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max); - cap->max_send_wr = max_send_wrs; - cap->max_recv_wr = sp->recv_credit_max; - cap->max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; - cap->max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; - cap->max_inline_data = 0; - cap->max_rdma_ctxs = sc->rw_io.credits.max; return 0; } @@ -2029,13 +1970,129 @@ static int smb_direct_create_pools(struct smbdirect_socket *sc) return -ENOMEM; } -static int smb_direct_create_qpair(struct smbdirect_socket *sc, - struct ib_qp_cap *cap) +static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr) +{ + /* + * This could be split out of rdma_rw_init_qp() + * and be a helper function next to rdma_rw_mr_factor() + * + * We can't check unlikely(rdma_rw_force_mr) here, + * but that is most likely 0 anyway. + */ + u32 factor; + + WARN_ON_ONCE(attr->port_num == 0); + + /* + * Each context needs at least one RDMA READ or WRITE WR. + * + * For some hardware we might need more, eventually we should ask the + * HCA driver for a multiplier here. + */ + factor = 1; + + /* + * If the device needs MRs to perform RDMA READ or WRITE operations, + * we'll need two additional MRs for the registrations and the + * invalidation. + */ + if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd) + factor += 2; /* inv + reg */ + + return factor * attr->cap.max_rdma_ctxs; +} + +static int smb_direct_create_qpair(struct smbdirect_socket *sc) { struct smbdirect_socket_parameters *sp = &sc->parameters; int ret; + struct ib_qp_cap qp_cap; struct ib_qp_init_attr qp_attr; - int pages_per_rw; + u32 max_send_wr; + u32 rdma_send_wr; + + /* + * Note that {rdma,ib}_create_qp() will call + * rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0. + * It will adjust cap->max_send_wr to the required + * number of additional WRs for the RDMA RW operations. + * It will cap cap->max_send_wr to the device limit. + * + * +1 for ib_drain_qp + */ + qp_cap.max_send_wr = sp->send_credit_target + 1; + qp_cap.max_recv_wr = sp->recv_credit_max + 1; + qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; + qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE; + qp_cap.max_inline_data = 0; + qp_cap.max_rdma_ctxs = sc->rw_io.credits.max; + + /* + * Find out the number of max_send_wr + * after rdma_rw_init_qp() adjusted it. + * + * We only do it on a temporary variable, + * as rdma_create_qp() will trigger + * rdma_rw_init_qp() again. + */ + memset(&qp_attr, 0, sizeof(qp_attr)); + qp_attr.cap = qp_cap; + qp_attr.port_num = sc->rdma.cm_id->port_num; + rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr); + max_send_wr = qp_cap.max_send_wr + rdma_send_wr; + + if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_send_wr %d\n", + qp_cap.max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d\n", + sp->send_credit_target); + return -EINVAL; + } + + if (qp_cap.max_rdma_ctxs && + (max_send_wr >= sc->ib.dev->attrs.max_cqe || + max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) { + pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n", + rdma_send_wr, qp_cap.max_send_wr, max_send_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n", + sp->send_credit_target, qp_cap.max_rdma_ctxs); + return -EINVAL; + } + + if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe || + qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) { + pr_err("Possible CQE overrun: max_recv_wr %d\n", + qp_cap.max_recv_wr); + pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); + pr_err("consider lowering receive_credit_max = %d\n", + sp->recv_credit_max); + return -EINVAL; + } + + if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge || + qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) { + pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", + IB_DEVICE_NAME_MAX, + sc->ib.dev->name, + sc->ib.dev->attrs.max_send_sge, + sc->ib.dev->attrs.max_recv_sge); + return -EINVAL; + } sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); if (IS_ERR(sc->ib.pd)) { @@ -2046,8 +2103,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, } sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->send_credit_target + - cap->max_rdma_ctxs, + max_send_wr, IB_POLL_WORKQUEUE); if (IS_ERR(sc->ib.send_cq)) { pr_err("Can't create RDMA send CQ\n"); @@ -2057,7 +2113,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, } sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc, - sp->recv_credit_max, + qp_cap.max_recv_wr, IB_POLL_WORKQUEUE); if (IS_ERR(sc->ib.recv_cq)) { pr_err("Can't create RDMA recv CQ\n"); @@ -2066,10 +2122,18 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, goto err; } + /* + * We reset completely here! + * As the above use was just temporary + * to calc max_send_wr and rdma_send_wr. + * + * rdma_create_qp() will trigger rdma_rw_init_qp() + * again if max_rdma_ctxs is not 0. + */ memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smb_direct_qpair_handler; qp_attr.qp_context = sc; - qp_attr.cap = *cap; + qp_attr.cap = qp_cap; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = sc->ib.send_cq; @@ -2085,18 +2149,6 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc, sc->ib.qp = sc->rdma.cm_id->qp; sc->rdma.cm_id->event_handler = smb_direct_cm_handler; - pages_per_rw = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE) + 1; - if (pages_per_rw > sc->ib.dev->attrs.max_sgl_rd) { - ret = ib_mr_pool_init(sc->ib.qp, &sc->ib.qp->rdma_mrs, - sc->rw_io.credits.max, IB_MR_TYPE_MEM_REG, - sc->rw_io.credits.num_pages, 0); - if (ret) { - pr_err("failed to init mr pool count %zu pages %zu\n", - sc->rw_io.credits.max, sc->rw_io.credits.num_pages); - goto err; - } - } - return 0; err: if (sc->ib.qp) { @@ -2183,10 +2235,9 @@ static int smb_direct_prepare(struct ksmbd_transport *t) static int smb_direct_connect(struct smbdirect_socket *sc) { - struct ib_qp_cap qp_cap; int ret; - ret = smb_direct_init_params(sc, &qp_cap); + ret = smb_direct_init_params(sc); if (ret) { pr_err("Can't configure RDMA parameters\n"); return ret; @@ -2198,7 +2249,7 @@ static int smb_direct_connect(struct smbdirect_socket *sc) return ret; } - ret = smb_direct_create_qpair(sc, &qp_cap); + ret = smb_direct_create_qpair(sc); if (ret) { pr_err("Can't accept RDMA client: %d\n", ret); return ret; From 68335cbcddcd586b59820e6d484652ad62343112 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:35:58 +0200 Subject: [PATCH 166/263] smb: smbdirect: introduce smbdirect_socket.send_io.lcredits.* This will be used to implement a logic in order to make sure we don't overflow the send submission queue for ib_post_send(). We will initialize the local credits with the fixed sp->send_credit_target value, which matches the reserved slots in the submission queue for ib_post_send(). We will be a local credit first and then wait for a remote credit, if we managed to get both we are allowed to post an IB_WR_SEND[_WITH_INV]. The local credit is given back to the pool when we get the local ib_post_send() completion, while remote credits are granted by the peer. From reading the git history of the linux smbdirect implementations in client and server) it was seen that a peer granted more credits than we requested. I guess that only happened because of bugs in our implementation which was active as client and server. I guess Windows won't do that. So the local credits make sure we only use the amount of credits we asked for. The client already has some logic for this based on smbdirect_socket.send_io.pending.count, but that counts in the order direction and makes it complex it share common logic for various credits classes. That logic will be replaced soon. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 361db7f9f623..ee5a90d691c8 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -142,7 +142,15 @@ struct smbdirect_socket { } mem; /* - * The credit state for the send side + * The local credit state for ib_post_send() + */ + struct { + atomic_t count; + wait_queue_head_t wait_queue; + } lcredits; + + /* + * The remote credit state for the send side */ struct { atomic_t count; @@ -337,6 +345,9 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc) INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work); disable_delayed_work_sync(&sc->idle.timer_work); + atomic_set(&sc->send_io.lcredits.count, 0); + init_waitqueue_head(&sc->send_io.lcredits.wait_queue); + atomic_set(&sc->send_io.credits.count, 0); init_waitqueue_head(&sc->send_io.credits.wait_queue); From 8059c64049587dac8af37ad82e2034b64c2d9fee Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:35:59 +0200 Subject: [PATCH 167/263] smb: server: smb_direct_disconnect_rdma_connection() already wakes all waiters on error There's no need to care about pending or credit counters when we already disconnecting. And all related wait_event conditions already check for broken connections too. This will simplify the code and makes the following changes simpler. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 19b51205dc8c..9dabaf74db31 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -987,8 +987,6 @@ static int smb_direct_post_send(struct smbdirect_socket *sc, ret = ib_post_send(sc->ib.qp, wr, NULL); if (ret) { pr_err("failed to post send: %d\n", ret); - if (atomic_dec_and_test(&sc->send_io.pending.count)) - wake_up(&sc->send_io.pending.zero_wait_queue); smb_direct_disconnect_rdma_connection(sc); } return ret; @@ -1037,8 +1035,6 @@ static int smb_direct_flush_send_list(struct smbdirect_socket *sc, send_ctx->need_invalidate_rkey, send_ctx->remote_key); } else { - atomic_add(send_ctx->wr_cnt, &sc->send_io.credits.count); - wake_up(&sc->send_io.credits.wait_queue); list_for_each_entry_safe(first, last, &send_ctx->msg_list, sibling_list) { smb_direct_free_sendmsg(sc, first); From a90227462a14f5bdf7dfd4b73c2b75c54834efce Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:36:00 +0200 Subject: [PATCH 168/263] smb: server: simplify sibling_list handling in smb_direct_flush_send_list/send_done We have a list handling that is much easier to understand: 1. Before smb_direct_flush_send_list() is called all struct smbdirect_send_io messages are part of send_ctx->msg_list 2. Before smb_direct_flush_send_list() calls smb_direct_post_send() we remove the last element in send_ctx->msg_list and move all others into last->sibling_list. As only last has IB_SEND_SIGNALED and gets a completion vis send_done(). 3. send_done() has an easy way to free all others in sendmsg->sibling_list (if there are any). And use list_for_each_entry_safe() instead of a complex custom logic. This will help us to share send_done() in common code soon, as it will work fine for the client too, where last->sibling_list is currently always an empty list. Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 60 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 9dabaf74db31..2aa8e4d4c912 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -523,6 +523,12 @@ static void smb_direct_free_sendmsg(struct smbdirect_socket *sc, { int i; + /* + * The list needs to be empty! + * The caller should take care of it. + */ + WARN_ON_ONCE(!list_empty(&msg->sibling_list)); + if (msg->num_sge > 0) { ib_dma_unmap_single(sc->ib.dev, msg->sge[0].addr, msg->sge[0].length, @@ -908,9 +914,8 @@ static void smb_direct_post_recv_credits(struct work_struct *work) static void send_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbdirect_send_io *sendmsg, *sibling; + struct smbdirect_send_io *sendmsg, *sibling, *next; struct smbdirect_socket *sc; - struct list_head *pos, *prev, *end; sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); sc = sendmsg->socket; @@ -919,27 +924,26 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) ib_wc_status_msg(wc->status), wc->status, wc->opcode); + /* + * Free possible siblings and then the main send_io + */ + list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) { + list_del_init(&sibling->sibling_list); + smb_direct_free_sendmsg(sc, sibling); + } + /* Note this frees wc->wr_cqe, but not wc */ + smb_direct_free_sendmsg(sc, sendmsg); + if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { pr_err("Send error. status='%s (%d)', opcode=%d\n", ib_wc_status_msg(wc->status), wc->status, wc->opcode); smb_direct_disconnect_rdma_connection(sc); + return; } if (atomic_dec_and_test(&sc->send_io.pending.count)) wake_up(&sc->send_io.pending.zero_wait_queue); - - /* iterate and free the list of messages in reverse. the list's head - * is invalid. - */ - for (pos = &sendmsg->sibling_list, prev = pos->prev, end = sendmsg->sibling_list.next; - prev != end; pos = prev, prev = prev->prev) { - sibling = container_of(pos, struct smbdirect_send_io, sibling_list); - smb_direct_free_sendmsg(sc, sibling); - } - - sibling = container_of(pos, struct smbdirect_send_io, sibling_list); - smb_direct_free_sendmsg(sc, sibling); } static int manage_credits_prior_sending(struct smbdirect_socket *sc) @@ -1029,17 +1033,29 @@ static int smb_direct_flush_send_list(struct smbdirect_socket *sc, last->wr.send_flags = IB_SEND_SIGNALED; last->wr.wr_cqe = &last->cqe; + /* + * Remove last from send_ctx->msg_list + * and splice the rest of send_ctx->msg_list + * to last->sibling_list. + * + * send_ctx->msg_list is a valid empty list + * at the end. + */ + list_del_init(&last->sibling_list); + list_splice_tail_init(&send_ctx->msg_list, &last->sibling_list); + send_ctx->wr_cnt = 0; + ret = smb_direct_post_send(sc, &first->wr); - if (!ret) { - smb_direct_send_ctx_init(send_ctx, - send_ctx->need_invalidate_rkey, - send_ctx->remote_key); - } else { - list_for_each_entry_safe(first, last, &send_ctx->msg_list, - sibling_list) { - smb_direct_free_sendmsg(sc, first); + if (ret) { + struct smbdirect_send_io *sibling, *next; + + list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) { + list_del_init(&sibling->sibling_list); + smb_direct_free_sendmsg(sc, sibling); } + smb_direct_free_sendmsg(sc, last); } + return ret; } From 0158e864cca0c98bdc2866f1eb30c66fa21e250c Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:36:01 +0200 Subject: [PATCH 169/263] smb: server: make use of smbdirect_socket.send_io.lcredits.* This introduces logic to prevent on overflow of the send submission queue with ib_post_send() easier. As we first get a local credit and then a remote credit before we mark us as pending. From reading the git history of the linux smbdirect implementations in client and server) it was seen that a peer granted more credits than we requested. I guess that only happened because of bugs in our implementation which was active as client and server. I guess Windows won't do that. So the local credits make sure we only use the amount of credits we asked for. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 42 ++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 2aa8e4d4c912..8aaa950a9449 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -219,6 +219,7 @@ static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc) * in order to notice the broken connection. */ wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.lcredits.wait_queue); wake_up_all(&sc->send_io.credits.wait_queue); wake_up_all(&sc->send_io.pending.zero_wait_queue); wake_up_all(&sc->recv_io.reassembly.wait_queue); @@ -916,6 +917,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) { struct smbdirect_send_io *sendmsg, *sibling, *next; struct smbdirect_socket *sc; + int lcredits = 0; sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); sc = sendmsg->socket; @@ -930,9 +932,11 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) { list_del_init(&sibling->sibling_list); smb_direct_free_sendmsg(sc, sibling); + lcredits += 1; } /* Note this frees wc->wr_cqe, but not wc */ smb_direct_free_sendmsg(sc, sendmsg); + lcredits += 1; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { pr_err("Send error. status='%s (%d)', opcode=%d\n", @@ -942,6 +946,9 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) return; } + atomic_add(lcredits, &sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + if (atomic_dec_and_test(&sc->send_io.pending.count)) wake_up(&sc->send_io.pending.zero_wait_queue); } @@ -1081,6 +1088,23 @@ static int wait_for_credits(struct smbdirect_socket *sc, } while (true); } +static int wait_for_send_lcredit(struct smbdirect_socket *sc, + struct smbdirect_send_batch *send_ctx) +{ + if (send_ctx && (atomic_read(&sc->send_io.lcredits.count) <= 1)) { + int ret; + + ret = smb_direct_flush_send_list(sc, send_ctx, false); + if (ret) + return ret; + } + + return wait_for_credits(sc, + &sc->send_io.lcredits.wait_queue, + &sc->send_io.lcredits.count, + 1); +} + static int wait_for_send_credits(struct smbdirect_socket *sc, struct smbdirect_send_batch *send_ctx) { @@ -1268,9 +1292,13 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, int data_length; struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1]; + ret = wait_for_send_lcredit(sc, send_ctx); + if (ret) + goto lcredit_failed; + ret = wait_for_send_credits(sc, send_ctx); if (ret) - return ret; + goto credit_failed; data_length = 0; for (i = 0; i < niov; i++) @@ -1278,10 +1306,8 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, ret = smb_direct_create_header(sc, data_length, remaining_data_length, &msg); - if (ret) { - atomic_inc(&sc->send_io.credits.count); - return ret; - } + if (ret) + goto header_failed; for (i = 0; i < niov; i++) { struct ib_sge *sge; @@ -1319,7 +1345,11 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc, return 0; err: smb_direct_free_sendmsg(sc, msg); +header_failed: atomic_inc(&sc->send_io.credits.count); +credit_failed: + atomic_inc(&sc->send_io.lcredits.count); +lcredit_failed: return ret; } @@ -1897,6 +1927,8 @@ static int smb_direct_init_params(struct smbdirect_socket *sc) return -EINVAL; } + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); + maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE); sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev, sc->rdma.cm_id->port_num, From 123111ea6226c5302cc192028e7ae923c44e1382 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 20:36:02 +0200 Subject: [PATCH 170/263] smb: client: make use of smbdirect_socket.send_io.lcredits.* This makes the logic to prevent on overflow of the send submission queue with ib_post_send() easier. As we first get a local credit and then a remote credit before we mark us as pending. For now we'll keep the logic around smbdirect_socket.send_io.pending.*, but that will likely change or be removed completely. The server will get a similar logic soon, so we'll be able to share the send code in future. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 67 ++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 49e2df3ad1f0..f2da694336ee 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -172,6 +172,7 @@ static void smbd_disconnect_wake_up_all(struct smbdirect_socket *sc) * in order to notice the broken connection. */ wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.lcredits.wait_queue); wake_up_all(&sc->send_io.credits.wait_queue); wake_up_all(&sc->send_io.pending.dec_wait_queue); wake_up_all(&sc->send_io.pending.zero_wait_queue); @@ -495,6 +496,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) struct smbdirect_send_io *request = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); struct smbdirect_socket *sc = request->socket; + int lcredits = 0; log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%s\n", request, ib_wc_status_msg(wc->status)); @@ -504,22 +506,24 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); + mempool_free(request, sc->send_io.mem.pool); + lcredits += 1; if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { if (wc->status != IB_WC_WR_FLUSH_ERR) log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n", ib_wc_status_msg(wc->status), wc->opcode); - mempool_free(request, sc->send_io.mem.pool); smbd_disconnect_rdma_connection(sc); return; } + atomic_add(lcredits, &sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + if (atomic_dec_and_test(&sc->send_io.pending.count)) wake_up(&sc->send_io.pending.zero_wait_queue); wake_up(&sc->send_io.pending.dec_wait_queue); - - mempool_free(request, sc->send_io.mem.pool); } static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) @@ -567,6 +571,7 @@ static bool process_negotiation_response( log_rdma_event(ERR, "error: credits_granted==0\n"); return false; } + atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target); atomic_set(&sc->send_io.credits.count, le16_to_cpu(packet->credits_granted)); if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { @@ -1114,6 +1119,24 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, struct smbdirect_data_transfer *packet; int new_credits = 0; +wait_lcredit: + /* Wait for local send credits */ + rc = wait_event_interruptible(sc->send_io.lcredits.wait_queue, + atomic_read(&sc->send_io.lcredits.count) > 0 || + sc->status != SMBDIRECT_SOCKET_CONNECTED); + if (rc) + goto err_wait_lcredit; + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + log_outgoing(ERR, "disconnected not sending on wait_credit\n"); + rc = -EAGAIN; + goto err_wait_lcredit; + } + if (unlikely(atomic_dec_return(&sc->send_io.lcredits.count) < 0)) { + atomic_inc(&sc->send_io.lcredits.count); + goto wait_lcredit; + } + wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ rc = wait_event_interruptible(sc->send_io.credits.wait_queue, @@ -1132,23 +1155,6 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, goto wait_credit; } -wait_send_queue: - wait_event(sc->send_io.pending.dec_wait_queue, - atomic_read(&sc->send_io.pending.count) < sp->send_credit_target || - sc->status != SMBDIRECT_SOCKET_CONNECTED); - - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); - rc = -EAGAIN; - goto err_wait_send_queue; - } - - if (unlikely(atomic_inc_return(&sc->send_io.pending.count) > - sp->send_credit_target)) { - atomic_dec(&sc->send_io.pending.count); - goto wait_send_queue; - } - request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); if (!request) { rc = -ENOMEM; @@ -1229,10 +1235,21 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, le32_to_cpu(packet->data_length), le32_to_cpu(packet->remaining_data_length)); + /* + * Now that we got a local and a remote credit + * we add us as pending + */ + atomic_inc(&sc->send_io.pending.count); + rc = smbd_post_send(sc, request); if (!rc) return 0; + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); + + wake_up(&sc->send_io.pending.dec_wait_queue); + err_dma: for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr) @@ -1246,14 +1263,14 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc, atomic_sub(new_credits, &sc->recv_io.credits.count); err_alloc: - if (atomic_dec_and_test(&sc->send_io.pending.count)) - wake_up(&sc->send_io.pending.zero_wait_queue); - -err_wait_send_queue: - /* roll back send credits and pending */ atomic_inc(&sc->send_io.credits.count); + wake_up(&sc->send_io.credits.wait_queue); err_wait_credit: + atomic_inc(&sc->send_io.lcredits.count); + wake_up(&sc->send_io.lcredits.wait_queue); + +err_wait_lcredit: return rc; } From 5b2ff4873aeab972f919d5aea11c51393322bf58 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 20 Oct 2025 09:40:02 +0100 Subject: [PATCH 171/263] cifs: Fix TCP_Server_Info::credits to be signed Fix TCP_Server_Info::credits to be signed, just as echo_credits and oplock_credits are. This also fixes what ought to get at least a compilation warning if not an outright error in *get_credits_field() as a pointer to the unsigned server->credits field is passed back as a pointer to a signed int. Signed-off-by: David Howells cc: linux-cifs@vger.kernel.org Cc: stable@vger.kernel.org Acked-by: Paulo Alcantara (Red Hat) Acked-by: Pavel Shilovskiy Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 16a00a61fd2c..00982aa9428f 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -732,7 +732,7 @@ struct TCP_Server_Info { bool nosharesock; bool tcp_nodelay; bool terminate; - unsigned int credits; /* send no more requests at once */ + int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ unsigned int in_flight; /* number of requests on the wire to server */ unsigned int max_in_flight; /* max number of requests that were on wire */ From e4a77f9c85a528b3289c1d9570d6d73a7b5f847b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Oct 2025 15:37:15 +0200 Subject: [PATCH 172/263] gpiolib: acpi: Make set debounce errors non fatal Commit 16c07342b542 ("gpiolib: acpi: Program debounce when finding GPIO") adds a gpio_set_debounce_timeout() call to acpi_find_gpio() and makes acpi_find_gpio() fail if this fails. But gpio_set_debounce_timeout() failing is a somewhat normal occurrence, since not all debounce values are supported on all GPIO/pinctrl chips. Making this an error for example break getting the card-detect GPIO for the micro-sd slot found on many Bay Trail tablets, breaking support for the micro-sd slot on these tablets. acpi_request_own_gpiod() already treats gpio_set_debounce_timeout() failures as non-fatal, just warning about them. Add a acpi_gpio_set_debounce_timeout() helper which wraps gpio_set_debounce_timeout() and warns on failures and replace both existing gpio_set_debounce_timeout() calls with the helper. Since the helper only warns on failures this fixes the card-detect issue. Fixes: 16c07342b542 ("gpiolib: acpi: Program debounce when finding GPIO") Cc: stable@vger.kernel.org Cc: Mario Limonciello Signed-off-by: Hans de Goede Acked-by: Andy Shevchenko Link: https://lore.kernel.org/stable/20250920201200.20611-1-hansg%40kernel.org Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi-core.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi-core.c b/drivers/gpio/gpiolib-acpi-core.c index 284e762d92c4..67c4c38afb86 100644 --- a/drivers/gpio/gpiolib-acpi-core.c +++ b/drivers/gpio/gpiolib-acpi-core.c @@ -291,6 +291,19 @@ acpi_gpio_to_gpiod_flags(const struct acpi_resource_gpio *agpio, int polarity) return GPIOD_ASIS; } +static void acpi_gpio_set_debounce_timeout(struct gpio_desc *desc, + unsigned int acpi_debounce) +{ + int ret; + + /* ACPI uses hundredths of milliseconds units */ + acpi_debounce *= 10; + ret = gpio_set_debounce_timeout(desc, acpi_debounce); + if (ret) + gpiod_warn(desc, "Failed to set debounce-timeout %u: %d\n", + acpi_debounce, ret); +} + static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, struct acpi_resource_gpio *agpio, unsigned int index, @@ -300,18 +313,12 @@ static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, enum gpiod_flags flags = acpi_gpio_to_gpiod_flags(agpio, polarity); unsigned int pin = agpio->pin_table[index]; struct gpio_desc *desc; - int ret; desc = gpiochip_request_own_desc(chip, pin, label, polarity, flags); if (IS_ERR(desc)) return desc; - /* ACPI uses hundredths of milliseconds units */ - ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout * 10); - if (ret) - dev_warn(chip->parent, - "Failed to set debounce-timeout for pin 0x%04X, err %d\n", - pin, ret); + acpi_gpio_set_debounce_timeout(desc, agpio->debounce_timeout); return desc; } @@ -944,7 +951,6 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, bool can_fallback = acpi_can_fallback_to_crs(adev, con_id); struct acpi_gpio_info info = {}; struct gpio_desc *desc; - int ret; desc = __acpi_find_gpio(fwnode, con_id, idx, can_fallback, &info); if (IS_ERR(desc)) @@ -959,10 +965,7 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, acpi_gpio_update_gpiod_flags(dflags, &info); acpi_gpio_update_gpiod_lookup_flags(lookupflags, &info); - /* ACPI uses hundredths of milliseconds units */ - ret = gpio_set_debounce_timeout(desc, info.debounce * 10); - if (ret) - return ERR_PTR(ret); + acpi_gpio_set_debounce_timeout(desc, info.debounce); return desc; } From b1055678a0160b2952c322ad1b61805562698f99 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 23 Oct 2025 08:39:58 +0200 Subject: [PATCH 173/263] gpiolib: acpi: Use %pe when passing an error pointer to dev_err() One of the coccinelle recipe suggests to use %pe when we deal with an error pointer. Do it so. Reported-by: kernel test robot Reported-by: Julia Lawall Closes: https://lore.kernel.org/r/202510231350.calxvXIm-lkp@intel.com/ Signed-off-by: Andy Shevchenko --- drivers/gpio/gpiolib-acpi-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi-core.c b/drivers/gpio/gpiolib-acpi-core.c index 67c4c38afb86..d441c1236d8c 100644 --- a/drivers/gpio/gpiolib-acpi-core.c +++ b/drivers/gpio/gpiolib-acpi-core.c @@ -382,8 +382,8 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, desc = acpi_request_own_gpiod(chip, agpio, 0, "ACPI:Event"); if (IS_ERR(desc)) { dev_err(chip->parent, - "Failed to request GPIO for pin 0x%04X, err %ld\n", - pin, PTR_ERR(desc)); + "Failed to request GPIO for pin 0x%04X, err %pe\n", + pin, desc); return AE_OK; } From 72ed55b4c335703c203b942972558173e1e5ddee Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Wed, 22 Oct 2025 21:11:01 -0300 Subject: [PATCH 174/263] smb: client: get rid of d_drop() in cifs_do_rename() There is no need to force a lookup by unhashing the moved dentry after successfully renaming the file on server. The file metadata will be re-fetched from server, if necessary, in the next call to ->d_revalidate() anyways. Signed-off-by: Paulo Alcantara (Red Hat) Reviewed-by: David Howells Cc: stable@vger.kernel.org Cc: linux-cifs@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/inode.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 098a79b7a959..cac355364e43 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2484,11 +2484,8 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry, } #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */ do_rename_exit: - if (rc == 0) { + if (rc == 0) d_move(from_dentry, to_dentry); - /* Force a new lookup */ - d_drop(from_dentry); - } cifs_put_tlink(tlink); return rc; } From 4b1d7f62225a2fd024b2df5675515557169f17e7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Oct 2025 18:10:08 +0100 Subject: [PATCH 175/263] cifs: Call the calc_signature functions directly As the SMB1 and SMB2/3 calc_signature functions are called from separate sign and verify paths, just call them directly rather than using a function pointer. The SMB3 calc_signature then jumps to the SMB2 variant if necessary. Signed-off-by: David Howells Acked-by: Enzo Matsumiya cc: Paulo Alcantara cc: Shyam Prasad N cc: Tom Talpey cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 2 -- fs/smb/client/smb2ops.c | 4 ---- fs/smb/client/smb2proto.h | 6 ------ fs/smb/client/smb2transport.c | 18 +++++++++--------- 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 00982aa9428f..203e2aaa3c25 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -534,8 +534,6 @@ struct smb_version_operations { void (*new_lease_key)(struct cifs_fid *); int (*generate_signingkey)(struct cifs_ses *ses, struct TCP_Server_Info *server); - int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *, - bool allocate_crypto); int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 95cd484cfbba..0f9130ef2e7d 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -5446,7 +5446,6 @@ struct smb_version_operations smb20_operations = { .get_lease_key = smb2_get_lease_key, .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, - .calc_signature = smb2_calc_signature, .is_read_op = smb2_is_read_op, .set_oplock_level = smb2_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, @@ -5550,7 +5549,6 @@ struct smb_version_operations smb21_operations = { .get_lease_key = smb2_get_lease_key, .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, - .calc_signature = smb2_calc_signature, .is_read_op = smb21_is_read_op, .set_oplock_level = smb21_set_oplock_level, .create_lease_buf = smb2_create_lease_buf, @@ -5660,7 +5658,6 @@ struct smb_version_operations smb30_operations = { .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, .generate_signingkey = generate_smb30signingkey, - .calc_signature = smb3_calc_signature, .set_integrity = smb3_set_integrity, .is_read_op = smb21_is_read_op, .set_oplock_level = smb3_set_oplock_level, @@ -5777,7 +5774,6 @@ struct smb_version_operations smb311_operations = { .set_lease_key = smb2_set_lease_key, .new_lease_key = smb2_new_lease_key, .generate_signingkey = generate_smb311signingkey, - .calc_signature = smb3_calc_signature, .set_integrity = smb3_set_integrity, .is_read_op = smb21_is_read_op, .set_oplock_level = smb3_set_oplock_level, diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 6eb86d134abc..5241daaae543 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -39,12 +39,6 @@ extern struct mid_q_entry *smb2_setup_async_request( struct TCP_Server_Info *server, struct smb_rqst *rqst); extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid); -extern int smb2_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, - bool allocate_crypto); -extern int smb3_calc_signature(struct smb_rqst *rqst, - struct TCP_Server_Info *server, - bool allocate_crypto); extern void smb2_echo_request(struct work_struct *work); extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode); extern bool smb2_is_valid_oplock_break(char *buffer, diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index ad6068e17a2a..6a9b80385b86 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -209,9 +209,9 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) return tcon; } -int +static int smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, - bool allocate_crypto) + bool allocate_crypto) { int rc; unsigned char smb2_signature[SMB2_HMACSHA256_SIZE]; @@ -465,9 +465,9 @@ generate_smb311signingkey(struct cifs_ses *ses, return generate_smb3signingkey(ses, server, &triplet); } -int +static int smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, - bool allocate_crypto) + bool allocate_crypto) { int rc; unsigned char smb3_signature[SMB2_CMACAES_SIZE]; @@ -477,6 +477,9 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, struct smb_rqst drqst; u8 key[SMB3_SIGN_KEY_SIZE]; + if (server->vals->protocol_id <= SMB21_PROT_ID) + return smb2_calc_signature(rqst, server, allocate_crypto); + rc = smb3_get_sign_key(le64_to_cpu(shdr->SessionId), server, key); if (unlikely(rc)) { cifs_server_dbg(FYI, "%s: Could not get signing key\n", __func__); @@ -547,7 +550,6 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server, static int smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) { - int rc = 0; struct smb2_hdr *shdr; struct smb2_sess_setup_req *ssr; bool is_binding; @@ -574,9 +576,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server) return 0; } - rc = server->ops->calc_signature(rqst, server, false); - - return rc; + return smb3_calc_signature(rqst, server, false); } int @@ -612,7 +612,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server) memset(shdr->Signature, 0, SMB2_SIGNATURE_SIZE); - rc = server->ops->calc_signature(rqst, server, true); + rc = smb3_calc_signature(rqst, server, true); if (rc) return rc; From 64c9471aa9ded2440bf182b1c71d3f93f80b2f85 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 20 Oct 2025 10:16:07 +0100 Subject: [PATCH 176/263] cifs: #include cifsglob.h before trace.h to allow structs in tracepoints Make cifs #include cifsglob.h in advance of #including trace.h so that the structures defined in cifsglob.h can be accessed directly by the cifs tracepoints rather than the callers having to manually pass in the bits and pieces. This should allow the tracepoints to be made more efficient to use as well as easier to read in the code. Signed-off-by: David Howells cc: Paulo Alcantara cc: linux-cifs@vger.kernel.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsproto.h | 1 + fs/smb/client/trace.c | 1 + 2 files changed, 2 insertions(+) diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index 4976be2c47c1..fb1813cbe0eb 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -9,6 +9,7 @@ #define _CIFSPROTO_H #include #include +#include "cifsglob.h" #include "trace.h" #ifdef CONFIG_CIFS_DFS_UPCALL #include "dfs_cache.h" diff --git a/fs/smb/client/trace.c b/fs/smb/client/trace.c index 465483787193..16b0e719731f 100644 --- a/fs/smb/client/trace.c +++ b/fs/smb/client/trace.c @@ -4,5 +4,6 @@ * * Author(s): Steve French */ +#include "cifsglob.h" #define CREATE_TRACE_POINTS #include "trace.h" From 622865c73ae30f254abdf182f4b66cccbe3e0f10 Mon Sep 17 00:00:00 2001 From: LI Qingwu Date: Thu, 23 Oct 2025 03:44:22 +0000 Subject: [PATCH 177/263] USB: serial: option: add Telit FN920C04 ECM compositions Add support for the Telit Cinterion FN920C04 module when operating in ECM (Ethernet Control Model) mode. The following USB product IDs are used by the module when AT#USBCFG is set to 3 or 7. 0x10A3: ECM + tty (NMEA) + tty (DUN) [+ tty (DIAG)] T: Bus=01 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a3 Rev= 5.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=76e7cb38 C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=60 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms 0x10A8: ECM + tty (DUN) + tty (AUX) [+ tty (DIAG)] T: Bus=03 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1bc7 ProdID=10a8 Rev= 5.15 S: Manufacturer=Telit Cinterion S: Product=FN920 S: SerialNumber=76e7cb38 C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=40 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=option E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Adding these IDs allows the option driver to automatically create the corresponding /dev/ttyUSB* ports under ECM mode. Tested with FN920C04 under ECM configuration (USBCFG=3 and 7). Signed-off-by: LI Qingwu Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3d6ebe2692a9..5de856f65f0d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1403,10 +1403,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a2, 0xff), /* Telit FN920C04 (MBIM) */ .driver_info = NCTRL(4) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a3, 0xff), /* Telit FN920C04 (ECM) */ + .driver_info = NCTRL(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a4, 0xff), /* Telit FN20C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a7, 0xff), /* Telit FN920C04 (MBIM) */ .driver_info = NCTRL(4) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a8, 0xff), /* Telit FN920C04 (ECM) */ + .driver_info = NCTRL(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10a9, 0xff), /* Telit FN20C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10aa, 0xff), /* Telit FN920C04 (MBIM) */ From 4c4e6ea4a120cc5ab58e437c6ba123cbfc357d45 Mon Sep 17 00:00:00 2001 From: Haotian Zhang Date: Thu, 23 Oct 2025 15:02:30 +0800 Subject: [PATCH 178/263] gpio: ljca: Fix duplicated IRQ mapping The generic_handle_domain_irq() function resolves the hardware IRQ internally. The driver performed a duplicative mapping by calling irq_find_mapping() first, which could lead to an RCU stall. Delete the redundant irq_find_mapping() call and pass the hardware IRQ directly to generic_handle_domain_irq(). Fixes: c5a4b6fd31e8 ("gpio: Add support for Intel LJCA USB GPIO driver") Signed-off-by: Haotian Zhang Link: https://lore.kernel.org/r/20251023070231.1305-1-vulab@iscas.ac.cn [Bartosz: remove unused variable] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-ljca.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-ljca.c b/drivers/gpio/gpio-ljca.c index 3b4f8830c741..f32d1d237795 100644 --- a/drivers/gpio/gpio-ljca.c +++ b/drivers/gpio/gpio-ljca.c @@ -286,22 +286,14 @@ static void ljca_gpio_event_cb(void *context, u8 cmd, const void *evt_data, { const struct ljca_gpio_packet *packet = evt_data; struct ljca_gpio_dev *ljca_gpio = context; - int i, irq; + int i; if (cmd != LJCA_GPIO_INT_EVENT) return; for (i = 0; i < packet->num; i++) { - irq = irq_find_mapping(ljca_gpio->gc.irq.domain, - packet->item[i].index); - if (!irq) { - dev_err(ljca_gpio->gc.parent, - "gpio_id %u does not mapped to IRQ yet\n", - packet->item[i].index); - return; - } - - generic_handle_domain_irq(ljca_gpio->gc.irq.domain, irq); + generic_handle_domain_irq(ljca_gpio->gc.irq.domain, + packet->item[i].index); set_bit(packet->item[i].index, ljca_gpio->reenable_irqs); } From c5efc6a0b3940381d67887302ddb87a5cf623685 Mon Sep 17 00:00:00 2001 From: Alok Tiwari Date: Thu, 23 Oct 2025 04:55:24 -0700 Subject: [PATCH 179/263] io_uring: correct __must_hold annotation in io_install_fixed_file The __must_hold annotation references &req->ctx->uring_lock, but req is not in scope in io_install_fixed_file. This change updates the annotation to reference the correct ctx->uring_lock. improving code clarity. Fixes: f110ed8498af ("io_uring: split out fixed file installation and removal") Signed-off-by: Alok Tiwari Signed-off-by: Jens Axboe --- io_uring/filetable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/filetable.c b/io_uring/filetable.c index a21660e3145a..794ef95df293 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -57,7 +57,7 @@ void io_free_file_tables(struct io_ring_ctx *ctx, struct io_file_table *table) static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, u32 slot_index) - __must_hold(&req->ctx->uring_lock) + __must_hold(&ctx->uring_lock) { struct io_rsrc_node *node; From eecd7cb64178efb35f89aa5134cf6ce36c0c66db Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 23 Oct 2025 14:01:07 +0200 Subject: [PATCH 180/263] slab: fix slab accounting imbalance due to defer_deactivate_slab() Since commit af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") there's a possibility in alloc_single_from_new_slab() that we discard the newly allocated slab if we can't spin and we fail to trylock. As a result we don't perform inc_slabs_node() later in the function. Instead we perform a deferred deactivate_slab() which can either put the unacounted slab on partial list, or discard it immediately while performing dec_slabs_node(). Either way will cause an accounting imbalance. Fix this by not marking the slab as frozen, and using free_slab() instead of deactivate_slab() for non-frozen slabs in free_deferred_objects(). For CONFIG_SLUB_TINY, that's the only possible case. By not using discard_slab() we avoid dec_slabs_node(). Fixes: af92793e52c3 ("slab: Introduce kmalloc_nolock() and kfree_nolock().") Link: https://patch.msgid.link/20251023-fix-slab-accounting-v2-1-0e62d50986ea@suse.cz Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- mm/slub.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 23d8f54e9486..87a1d2f9de0d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3422,7 +3422,6 @@ static void *alloc_single_from_new_slab(struct kmem_cache *s, struct slab *slab, if (!allow_spin && !spin_trylock_irqsave(&n->list_lock, flags)) { /* Unlucky, discard newly allocated slab */ - slab->frozen = 1; defer_deactivate_slab(slab, NULL); return NULL; } @@ -6471,9 +6470,12 @@ static void free_deferred_objects(struct irq_work *work) struct slab *slab = container_of(pos, struct slab, llnode); #ifdef CONFIG_SLUB_TINY - discard_slab(slab->slab_cache, slab); + free_slab(slab->slab_cache, slab); #else - deactivate_slab(slab->slab_cache, slab, slab->flush_freelist); + if (slab->frozen) + deactivate_slab(slab->slab_cache, slab, slab->flush_freelist); + else + free_slab(slab->slab_cache, slab); #endif } } From b98c94eed4a975e0c80b7e90a649a46967376f58 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 22 Oct 2025 11:09:14 +0100 Subject: [PATCH 181/263] arm64: mte: Do not warn if the page is already tagged in copy_highpage() The arm64 copy_highpage() assumes that the destination page is newly allocated and not MTE-tagged (PG_mte_tagged unset) and warns accordingly. However, following commit 060913999d7a ("mm: migrate: support poisoned recover from migrate folio"), folio_mc_copy() is called before __folio_migrate_mapping(). If the latter fails (-EAGAIN), the copy will be done again to the same destination page. Since copy_highpage() already set the PG_mte_tagged flag, this second copy will warn. Replace the WARN_ON_ONCE(page already tagged) in the arm64 copy_highpage() with a comment. Reported-by: syzbot+d1974fc28545a3e6218b@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/68dda1ae.a00a0220.102ee.0065.GAE@google.com Reviewed-by: David Hildenbrand Cc: Will Deacon Cc: Kefeng Wang Cc: stable@vger.kernel.org # 6.12.x Reviewed-by: Yang Shi Signed-off-by: Catalin Marinas --- arch/arm64/mm/copypage.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index a86c897017df..cd5912ba617b 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -35,7 +35,7 @@ void copy_highpage(struct page *to, struct page *from) from != folio_page(src, 0)) return; - WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst)); + folio_try_hugetlb_mte_tagging(dst); /* * Populate tags for all subpages. @@ -51,8 +51,13 @@ void copy_highpage(struct page *to, struct page *from) } folio_set_hugetlb_mte_tagged(dst); } else if (page_mte_tagged(from)) { - /* It's a new page, shouldn't have been tagged yet */ - WARN_ON_ONCE(!try_page_mte_tagging(to)); + /* + * Most of the time it's a new page that shouldn't have been + * tagged yet. However, folio migration can end up reusing the + * same page without untagging it. Ignore the warning if the + * page is already tagged. + */ + try_page_mte_tagging(to); mte_copy_page_tags(kto, kfrom); set_page_mte_tagged(to); From 6fab32bb6508abbb8b7b1c5498e44f0c32320ed5 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 22 Oct 2025 16:36:25 +1100 Subject: [PATCH 182/263] MAINTAINERS: add Mark Brown as a linux-next maintainer Mark has been kindly helping fill in when I have been unavailable over the past several years. He has also put his hand up to take over linux-next maintenance when I finally decide to stop (which may be some time yet ;-) ). Signed-off-by: Stephen Rothwell Acked-by: Mark Brown Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 25463fa36508..5889df9de210 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14395,6 +14395,7 @@ F: tools/memory-model/ LINUX-NEXT TREE M: Stephen Rothwell +M: Mark Brown L: linux-next@vger.kernel.org S: Supported B: mailto:linux-next@vger.kernel.org and the appropriate development tree From 420c84c330d1688b8c764479e5738bbdbf0a33de Mon Sep 17 00:00:00 2001 From: Lizhi Xu Date: Wed, 22 Oct 2025 10:40:07 +0800 Subject: [PATCH 183/263] usbnet: Prevents free active kevent The root cause of this issue are: 1. When probing the usbnet device, executing usbnet_link_change(dev, 0, 0); put the kevent work in global workqueue. However, the kevent has not yet been scheduled when the usbnet device is unregistered. Therefore, executing free_netdev() results in the "free active object (kevent)" error reported here. 2. Another factor is that when calling usbnet_disconnect()->unregister_netdev(), if the usbnet device is up, ndo_stop() is executed to cancel the kevent. However, because the device is not up, ndo_stop() is not executed. The solution to this problem is to cancel the kevent before executing free_netdev(). Fixes: a69e617e533e ("usbnet: Fix linkwatch use-after-free on disconnect") Reported-by: Sam Sun Closes: https://syzkaller.appspot.com/bug?extid=8bfd7bcc98f7300afb84 Signed-off-by: Lizhi Xu Link: https://patch.msgid.link/20251022024007.1831898-1-lizhi.xu@windriver.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/usbnet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index bf01f2728531..697cd9d866d3 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1659,6 +1659,8 @@ void usbnet_disconnect (struct usb_interface *intf) net = dev->net; unregister_netdev (net); + cancel_work_sync(&dev->kevent); + while ((urb = usb_get_from_anchor(&dev->deferred))) { dev_kfree_skb(urb->context); kfree(urb->sg); From 1ab665817448c31f4758dce43c455bd4c5e460aa Mon Sep 17 00:00:00 2001 From: Bui Quang Minh Date: Wed, 22 Oct 2025 22:56:30 +0700 Subject: [PATCH 184/263] virtio-net: drop the multi-buffer XDP packet in zerocopy In virtio-net, we have not yet supported multi-buffer XDP packet in zerocopy mode when there is a binding XDP program. However, in that case, when receiving multi-buffer XDP packet, we skip the XDP program and return XDP_PASS. As a result, the packet is passed to normal network stack which is an incorrect behavior (e.g. a XDP program for packet count is installed, multi-buffer XDP packet arrives and does go through XDP program. As a result, the packet count does not increase but the packet is still received from network stack).This commit instead returns XDP_ABORTED in that case. Fixes: 99c861b44eb1 ("virtio_net: xsk: rx: support recv merge mode") Cc: stable@vger.kernel.org Acked-by: Jason Wang Reviewed-by: Xuan Zhuo Signed-off-by: Bui Quang Minh Link: https://patch.msgid.link/20251022155630.49272-1-minhquangbui99@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/virtio_net.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a757cbcab87f..8e8a179aaa49 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1379,9 +1379,14 @@ static struct sk_buff *virtnet_receive_xsk_merge(struct net_device *dev, struct ret = XDP_PASS; rcu_read_lock(); prog = rcu_dereference(rq->xdp_prog); - /* TODO: support multi buffer. */ - if (prog && num_buf == 1) - ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, stats); + if (prog) { + /* TODO: support multi buffer. */ + if (num_buf == 1) + ret = virtnet_xdp_handler(prog, xdp, dev, xdp_xmit, + stats); + else + ret = XDP_ABORTED; + } rcu_read_unlock(); switch (ret) { From 246aca5b2a2c4ad3e75c2eff616f5532019a92d2 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 23 Oct 2025 19:43:49 +0900 Subject: [PATCH 185/263] firewire: core: fix __must_hold() annotation The variable name passed to __must_hold() annotation is invalid. This commit fixes it. Fixes: 420bd7068cbf ("firewire: core: use spin lock specific to transaction") Link: https://lore.kernel.org/r/20251023104349.415310-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index dd3656a0c1ff..c65f491c54d0 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -269,7 +269,7 @@ static void fw_fill_request(struct fw_packet *packet, int tcode, int tlabel, } static int allocate_tlabel(struct fw_card *card) -__must_hold(&card->transactions_lock) +__must_hold(&card->transactions.lock) { int tlabel; From df5192d9bb0e38bf831fb93e8026e346aa017ca8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 23 Oct 2025 13:06:26 -0500 Subject: [PATCH 186/263] PCI/ASPM: Enable only L0s and L1 for devicetree platforms f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms") enabled Clock Power Management and L1 PM Substates, but those features depend on CLKREQ# and possibly other device-specific configuration. We don't know whether CLKREQ# is supported, so we shouldn't blindly enable Clock PM and L1 PM Substates. Enable only ASPM L0s and L1, and only when both ends of the link advertise support for them. Fixes: f3ac2ff14834 ("PCI/ASPM: Enable all ClockPM and ASPM states for devicetree platforms") Reported-by: Christian Zigotzky Link: https://lore.kernel.org/r/db5c95a1-cf3e-46f9-8045-a1b04908051a@xenosoft.de/ Reported-by: FUKAUMI Naoki Closes: https://lore.kernel.org/r/22594781424C5C98+22cb5d61-19b1-4353-9818-3bb2b311da0b@radxa.com/ Reported-by: Herve Codina Link: https://lore.kernel.org/r/20251015101304.3ec03e6b@bootlin.com/ Reported-by: Diederik de Haas Closes: https://lore.kernel.org/r/DDJXHRIRGTW9.GYC2ULZ5WQAL@cknow-tech.com/ Signed-off-by: Bjorn Helgaas Tested-by: FUKAUMI Naoki Tested-by: Diederik de Haas Acked-by: Dragan Simic Link: https://patch.msgid.link/20251023180645.1304701-1-helgaas@kernel.org --- drivers/pci/pcie/aspm.c | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 7cc8281e7011..79b965158473 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -243,8 +243,7 @@ struct pcie_link_state { /* Clock PM state */ u32 clkpm_capable:1; /* Clock PM capable? */ u32 clkpm_enabled:1; /* Current Clock PM state */ - u32 clkpm_default:1; /* Default Clock PM state by BIOS or - override */ + u32 clkpm_default:1; /* Default Clock PM state by BIOS */ u32 clkpm_disable:1; /* Clock PM disabled */ }; @@ -376,18 +375,6 @@ static void pcie_set_clkpm(struct pcie_link_state *link, int enable) pcie_set_clkpm_nocheck(link, enable); } -static void pcie_clkpm_override_default_link_state(struct pcie_link_state *link, - int enabled) -{ - struct pci_dev *pdev = link->downstream; - - /* For devicetree platforms, enable ClockPM by default */ - if (of_have_populated_dt() && !enabled) { - link->clkpm_default = 1; - pci_info(pdev, "ASPM: DT platform, enabling ClockPM\n"); - } -} - static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) { int capable = 1, enabled = 1; @@ -410,7 +397,6 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) } link->clkpm_enabled = enabled; link->clkpm_default = enabled; - pcie_clkpm_override_default_link_state(link, enabled); link->clkpm_capable = capable; link->clkpm_disable = blacklist ? 1 : 0; } @@ -811,19 +797,17 @@ static void pcie_aspm_override_default_link_state(struct pcie_link_state *link) struct pci_dev *pdev = link->downstream; u32 override; - /* For devicetree platforms, enable all ASPM states by default */ + /* For devicetree platforms, enable L0s and L1 by default */ if (of_have_populated_dt()) { - link->aspm_default = PCIE_LINK_STATE_ASPM_ALL; + if (link->aspm_support & PCIE_LINK_STATE_L0S) + link->aspm_default |= PCIE_LINK_STATE_L0S; + if (link->aspm_support & PCIE_LINK_STATE_L1) + link->aspm_default |= PCIE_LINK_STATE_L1; override = link->aspm_default & ~link->aspm_enabled; if (override) - pci_info(pdev, "ASPM: DT platform, enabling%s%s%s%s%s%s%s\n", - FLAG(override, L0S_UP, " L0s-up"), - FLAG(override, L0S_DW, " L0s-dw"), - FLAG(override, L1, " L1"), - FLAG(override, L1_1, " ASPM-L1.1"), - FLAG(override, L1_2, " ASPM-L1.2"), - FLAG(override, L1_1_PCIPM, " PCI-PM-L1.1"), - FLAG(override, L1_2_PCIPM, " PCI-PM-L1.2")); + pci_info(pdev, "ASPM: default states%s%s\n", + FLAG(override, L0S, " L0s"), + FLAG(override, L1, " L1")); } } From 6f1cbf6d6fd13fc169dde14e865897924cdc4bbd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 24 Oct 2025 09:34:59 +0800 Subject: [PATCH 187/263] io_uring: fix buffer auto-commit for multishot uring_cmd Commit 620a50c92700 ("io_uring: uring_cmd: add multishot support") added multishot uring_cmd support with explicit buffer upfront commit via io_uring_mshot_cmd_post_cqe(). However, the buffer selection path in io_ring_buffer_select() was auto-committing buffers for non-pollable files, which conflicts with uring_cmd's explicit upfront commit model. This way consumes the whole selected buffer immediately, and causes failure on the following buffer selection. Fix this by checking uring_cmd to identify operations that handle buffer commit explicitly, and skip auto-commit for these operations. Cc: Caleb Sander Mateos Fixes: 620a50c92700 ("io_uring: uring_cmd: add multishot support") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index aad655e38672..a727e020fe03 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -155,6 +155,27 @@ static int io_provided_buffers_select(struct io_kiocb *req, size_t *len, return 1; } +static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags) +{ + /* + * If we came in unlocked, we have no choice but to consume the + * buffer here, otherwise nothing ensures that the buffer won't + * get used by others. This does mean it'll be pinned until the + * IO completes, coming in unlocked means we're being called from + * io-wq context and there may be further retries in async hybrid + * mode. For the locked case, the caller must call commit when + * the transfer completes (or if we get -EAGAIN and must poll of + * retry). + */ + if (issue_flags & IO_URING_F_UNLOCKED) + return true; + + /* uring_cmd commits kbuf upfront, no need to auto-commit */ + if (!io_file_can_poll(req) && req->opcode != IORING_OP_URING_CMD) + return true; + return false; +} + static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl, unsigned int issue_flags) @@ -181,17 +202,7 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, sel.buf_list = bl; sel.addr = u64_to_user_ptr(buf->addr); - if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { - /* - * If we came in unlocked, we have no choice but to consume the - * buffer here, otherwise nothing ensures that the buffer won't - * get used by others. This does mean it'll be pinned until the - * IO completes, coming in unlocked means we're being called from - * io-wq context and there may be further retries in async hybrid - * mode. For the locked case, the caller must call commit when - * the transfer completes (or if we get -EAGAIN and must poll of - * retry). - */ + if (io_should_commit(req, issue_flags)) { io_kbuf_commit(req, sel.buf_list, *len, 1); sel.buf_list = NULL; } From dd6940f5c7dbee7ae70708f4c8967c3c8cb1d965 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 15 Oct 2025 17:05:27 +0200 Subject: [PATCH 188/263] smb: server: let free_transport() wait for SMBDIRECT_SOCKET_DISCONNECTED We should wait for the rdma_cm to become SMBDIRECT_SOCKET_DISCONNECTED! At least on the client side (with similar code) wait_event_interruptible() often returns with -ERESTARTSYS instead of waiting for SMBDIRECT_SOCKET_DISCONNECTED. We should use wait_event() here too, which makes the code be identical in client and server, which will help when moving to common functions. Fixes: b31606097de8 ("smb: server: move smb_direct_disconnect_rdma_work() into free_transport()") Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 8aaa950a9449..89b02efdba0c 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -451,11 +451,10 @@ static void free_transport(struct smb_direct_transport *t) struct smbdirect_recv_io *recvmsg; disable_work_sync(&sc->disconnect_work); - if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) smb_direct_disconnect_rdma_work(&sc->disconnect_work); - wait_event_interruptible(sc->status_wait, - sc->status == SMBDIRECT_SOCKET_DISCONNECTED); - } + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); /* * Wake up all waiters in all wait queues From ce29214ada6d08dbde1eeb5a69c3b09ddf3da146 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 21 Oct 2025 17:55:36 -0700 Subject: [PATCH 189/263] drm/xe: Check return value of GGTT workqueue allocation Workqueue allocation can fail, so check the return value of the GGTT workqueue allocation and fail driver initialization if the allocation fails. Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs") Cc: stable@vger.kernel.org Signed-off-by: Matthew Brost Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20251022005538.828980-2-matthew.brost@intel.com (cherry picked from commit 1f1314e8e71385bae319e43082b798c11f6648bc) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_ggtt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 7fdd0a97a628..5edc0cad47e2 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -292,6 +292,9 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) ggtt->pt_ops = &xelp_pt_ops; ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM); + if (!ggtt->wq) + return -ENOMEM; + __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe)); err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt); From 5d7e45dd670e42df4836afeaa9baf9d41ca4b434 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Oct 2025 16:48:59 +0100 Subject: [PATCH 190/263] genirq/chip: Add buslock back in to irq_set_handler() The locking was changed from a buslock to a plain lock, but the patch description states there was no functional change. Assuming this was accidental so reverting to using the buslock. Fixes: 5cd05f3e2315 ("genirq/chip: Rework irq_set_handler() variants") Signed-off-by: Charles Keepax Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251023154901.1333755-2-ckeepax@opensource.cirrus.com --- kernel/irq/chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 3ffa0d80ddd1..d1917b28761a 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1030,7 +1030,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained, const char *name) { - scoped_irqdesc_get_and_lock(irq, 0) + scoped_irqdesc_get_and_buslock(irq, 0) __irq_do_set_handler(scoped_irqdesc, handle, is_chained, name); } EXPORT_SYMBOL_GPL(__irq_set_handler); From 56363e25f79fe83e63039c5595b8cd9814173d37 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Oct 2025 16:49:00 +0100 Subject: [PATCH 191/263] genirq/manage: Add buslock back in to __disable_irq_nosync() The locking was changed from a buslock to a plain lock, but the patch description states there was no functional change. Assuming this was accidental so reverting to using the buslock. Fixes: 1b7444446724 ("genirq/manage: Rework __disable_irq_nosync()") Signed-off-by: Charles Keepax Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251023154901.1333755-3-ckeepax@opensource.cirrus.com --- kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index c94837382037..7d68fb5dc242 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -659,7 +659,7 @@ void __disable_irq(struct irq_desc *desc) static int __disable_irq_nosync(unsigned int irq) { - scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { __disable_irq(scoped_irqdesc); return 0; } From ef3330b99c01bda53f2a189b58bed8f6b7397f28 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 23 Oct 2025 16:49:01 +0100 Subject: [PATCH 192/263] genirq/manage: Add buslock back in to enable_irq() The locking was changed from a buslock to a plain lock, but the patch description states there was no functional change. Assuming this was accidental so reverting to using the buslock. Fixes: bddd10c55407 ("genirq/manage: Rework enable_irq()") Signed-off-by: Charles Keepax Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251023154901.1333755-4-ckeepax@opensource.cirrus.com --- kernel/irq/manage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 7d68fb5dc242..400856abf672 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -789,7 +789,7 @@ void __enable_irq(struct irq_desc *desc) */ void enable_irq(unsigned int irq) { - scoped_irqdesc_get_and_lock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { struct irq_desc *desc = scoped_irqdesc; if (WARN(!desc->irq_data.chip, "enable_irq before setup/request_irq: irq %u\n", irq)) From 7f434e1d9a17ca5f567c9796c9c105a65c18db9a Mon Sep 17 00:00:00 2001 From: Hao Ge Date: Thu, 23 Oct 2025 22:33:13 +0800 Subject: [PATCH 193/263] slab: Fix obj_ext mistakenly considered NULL due to race condition If two competing threads enter alloc_slab_obj_exts(), and the one that allocates the vector wins the cmpxchg(), the other thread that failed allocation mistakenly assumes that slab->obj_exts is still empty due to its own allocation failure. This will then trigger warnings with CONFIG_MEM_ALLOC_PROFILING_DEBUG checks in the subsequent free path. Therefore, let's check the result of cmpxchg() to see if marking the allocation as failed was successful. If it wasn't, check whether the winning side has succeeded its allocation (it might have been also marking it as failed) and if yes, return success. Suggested-by: Harry Yoo Fixes: f7381b911640 ("slab: mark slab->obj_exts allocation failures unconditionally") Cc: Signed-off-by: Hao Ge Link: https://patch.msgid.link/20251023143313.1327968-1-hao.ge@linux.dev Reviewed-by: Suren Baghdasaryan Reviewed-by: Harry Yoo Signed-off-by: Vlastimil Babka --- mm/slub.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 87a1d2f9de0d..d4367f25b20d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2052,9 +2052,9 @@ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) } } -static inline void mark_failed_objexts_alloc(struct slab *slab) +static inline bool mark_failed_objexts_alloc(struct slab *slab) { - cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL); + return cmpxchg(&slab->obj_exts, 0, OBJEXTS_ALLOC_FAIL) == 0; } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, @@ -2076,7 +2076,7 @@ static inline void handle_failed_objexts_alloc(unsigned long obj_exts, #else /* CONFIG_MEM_ALLOC_PROFILING_DEBUG */ static inline void mark_objexts_empty(struct slabobj_ext *obj_exts) {} -static inline void mark_failed_objexts_alloc(struct slab *slab) {} +static inline bool mark_failed_objexts_alloc(struct slab *slab) { return false; } static inline void handle_failed_objexts_alloc(unsigned long obj_exts, struct slabobj_ext *vec, unsigned int objects) {} @@ -2124,8 +2124,14 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, slab_nid(slab)); } if (!vec) { - /* Mark vectors which failed to allocate */ - mark_failed_objexts_alloc(slab); + /* + * Try to mark vectors which failed to allocate. + * If this operation fails, there may be a racing process + * that has already completed the allocation. + */ + if (!mark_failed_objexts_alloc(slab) && + slab_obj_exts(slab)) + return 0; return -ENOMEM; } From 7209ff310083315386570bf8d001a0845fe7ab8c Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 21 Oct 2025 14:41:01 +0200 Subject: [PATCH 194/263] of/irq: Export of_msi_xlate() for module usage of_msi_xlate() is required by drivers that can be configured as modular, export the symbol. Signed-off-by: Lorenzo Pieralisi Reviewed-by: Frank Li Cc: Rob Herring Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251021124103.198419-4-lpieralisi@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index ee7d5f0842e8..1cd93549d093 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -733,6 +733,7 @@ u32 of_msi_xlate(struct device *dev, struct device_node **msi_np, u32 id_in) } return id_out; } +EXPORT_SYMBOL_GPL(of_msi_xlate); /** * of_msi_map_get_device_domain - Use msi-map to find the relevant MSI domain From 09b0cd1297b4dbfe736aeaa0ceeab2265f47f772 Mon Sep 17 00:00:00 2001 From: Cen Zhang Date: Mon, 29 Sep 2025 05:30:17 +0000 Subject: [PATCH 195/263] Bluetooth: hci_sync: fix race in hci_cmd_sync_dequeue_once hci_cmd_sync_dequeue_once() does lookup and then cancel the entry under two separate lock sections. Meanwhile, hci_cmd_sync_work() can also delete the same entry, leading to double list_del() and "UAF". Fix this by holding cmd_sync_work_lock across both lookup and cancel, so that the entry cannot be removed concurrently. Fixes: 505ea2b29592 ("Bluetooth: hci_sync: Add helper functions to manipulate cmd_sync queue") Reported-by: Cen Zhang Signed-off-by: Cen Zhang Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/hci_sync.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index eefdb6134ca5..d160e5e1fe8a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -863,11 +863,17 @@ bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, { struct hci_cmd_sync_work_entry *entry; - entry = hci_cmd_sync_lookup_entry(hdev, func, data, destroy); - if (!entry) - return false; + mutex_lock(&hdev->cmd_sync_work_lock); - hci_cmd_sync_cancel_entry(hdev, entry); + entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy); + if (!entry) { + mutex_unlock(&hdev->cmd_sync_work_lock); + return false; + } + + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); + + mutex_unlock(&hdev->cmd_sync_work_lock); return true; } From f0c200a4a537f8f374584a974518b0ce69eda76c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 26 Sep 2025 11:48:50 -0400 Subject: [PATCH 196/263] Bluetooth: ISO: Fix BIS connection dst_type handling Socket dst_type cannot be directly assigned to hci_conn->type since there domain is different which may lead to the wrong address type being used. Fixes: 6a5ad251b7cd ("Bluetooth: ISO: Fix possible circular locking dependency") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 9b263d061e05..954e1916506b 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2032,7 +2032,7 @@ static void iso_conn_ready(struct iso_conn *conn) */ if (!bacmp(&hcon->dst, BDADDR_ANY)) { bacpy(&hcon->dst, &iso_pi(parent)->dst); - hcon->dst_type = iso_pi(parent)->dst_type; + hcon->dst_type = le_addr_type(iso_pi(parent)->dst_type); } if (test_bit(HCI_CONN_PA_SYNC, &hcon->flags)) { From 77343b8b4f87560f8f03e77b98a81ff3a147b262 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 30 Sep 2025 13:39:33 +0800 Subject: [PATCH 197/263] Bluetooth: btmtksdio: Add pmctrl handling for BT closed state during reset This patch adds logic to handle power management control when the Bluetooth function is closed during the SDIO reset sequence. Specifically, if BT is closed before reset, the driver enables the SDIO function and sets driver pmctrl. After reset, if BT remains closed, the driver sets firmware pmctrl and disables the SDIO function. These changes ensure proper power management and device state consistency across the reset flow. Fixes: 8fafe702253d ("Bluetooth: mt7921s: support bluetooth reset mechanism") Signed-off-by: Chris Lu Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btmtksdio.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 50abefba6d04..62db31bd6592 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1270,6 +1270,12 @@ static void btmtksdio_reset(struct hci_dev *hdev) sdio_claim_host(bdev->func); + /* set drv_pmctrl if BT is closed before doing reset */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) { + sdio_enable_func(bdev->func); + btmtksdio_drv_pmctrl(bdev); + } + sdio_writel(bdev->func, C_INT_EN_CLR, MTK_REG_CHLPCR, NULL); skb_queue_purge(&bdev->txq); cancel_work_sync(&bdev->txrx_work); @@ -1285,6 +1291,12 @@ static void btmtksdio_reset(struct hci_dev *hdev) goto err; } + /* set fw_pmctrl back if BT is closed after doing reset */ + if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) { + btmtksdio_fw_pmctrl(bdev); + sdio_disable_func(bdev->func); + } + clear_bit(BTMTKSDIO_PATCH_ENABLED, &bdev->tx_state); err: sdio_release_host(bdev->func); From 0d92808024b4e9868cef68d16f121d509843e80e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 1 Oct 2025 10:55:58 -0400 Subject: [PATCH 198/263] Bluetooth: HCI: Fix tracking of advertisement set/instance 0x00 This fixes the state tracking of advertisement set/instance 0x00 which is considered a legacy instance and is not tracked individually by adv_instances list, previously it was assumed that hci_dev itself would track it via HCI_LE_ADV but that is a global state not specifc to instance 0x00, so to fix it a new flag is introduced that only tracks the state of instance 0x00. Fixes: 1488af7b8b5f ("Bluetooth: hci_sync: Fix hci_resume_advertising_sync") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci.h | 1 + net/bluetooth/hci_event.c | 4 ++++ net/bluetooth/hci_sync.c | 5 ++--- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 9ecc70baaca9..8d0e703bc929 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -434,6 +434,7 @@ enum { HCI_USER_CHANNEL, HCI_EXT_CONFIGURED, HCI_LE_ADV, + HCI_LE_ADV_0, HCI_LE_PER_ADV, HCI_LE_SCAN, HCI_SSP_ENABLED, diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index d790b0d4eb9a..1dabf5a7ae18 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1609,6 +1609,8 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, if (adv && !adv->periodic) adv->enabled = true; + else if (!set->handle) + hci_dev_set_flag(hdev, HCI_LE_ADV_0); conn = hci_lookup_le_connect(hdev); if (conn) @@ -1619,6 +1621,8 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, if (cp->num_of_sets) { if (adv) adv->enabled = false; + else if (!set->handle) + hci_dev_clear_flag(hdev, HCI_LE_ADV_0); /* If just one instance was disabled check if there are * any other instance enabled before clearing HCI_LE_ADV diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index d160e5e1fe8a..28ad08cd7d70 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2606,9 +2606,8 @@ static int hci_resume_advertising_sync(struct hci_dev *hdev) /* If current advertising instance is set to instance 0x00 * then we need to re-enable it. */ - if (!hdev->cur_adv_instance) - err = hci_enable_ext_advertising_sync(hdev, - hdev->cur_adv_instance); + if (hci_dev_test_and_clear_flag(hdev, HCI_LE_ADV_0)) + err = hci_enable_ext_advertising_sync(hdev, 0x00); } else { /* Schedule for most recent instance to be restarted and begin * the software rotation loop From e8785404de06a69d89dcdd1e9a0b6ea42dc6d327 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Fri, 3 Oct 2025 22:07:32 +0300 Subject: [PATCH 199/263] Bluetooth: MGMT: fix crash in set_mesh_sync and set_mesh_complete There is a BUG: KASAN: stack-out-of-bounds in set_mesh_sync due to memcpy from badly declared on-stack flexible array. Another crash is in set_mesh_complete() due to double list_del via mgmt_pending_valid + mgmt_pending_remove. Use DEFINE_FLEX to declare the flexible array right, and don't memcpy outside bounds. As mgmt_pending_valid removes the cmd from list, use mgmt_pending_free, and also report status on error. Fixes: 302a1f674c00d ("Bluetooth: MGMT: Fix possible UAFs") Signed-off-by: Pauli Virtanen Reviewed-by: Paul Menzel Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/mgmt.h | 2 +- net/bluetooth/mgmt.c | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 74edea06985b..bca0333f1e99 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -853,7 +853,7 @@ struct mgmt_cp_set_mesh { __le16 window; __le16 period; __u8 num_ad_types; - __u8 ad_types[]; + __u8 ad_types[] __counted_by(num_ad_types); } __packed; #define MGMT_SET_MESH_RECEIVER_SIZE 6 diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index a3d16eece0d2..24e335e3a727 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2175,19 +2175,24 @@ static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) sk = cmd->sk; if (status) { + mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + status); mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, true, cmd_status_rsp, &status); - return; + goto done; } - mgmt_pending_remove(cmd); mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, 0, NULL, 0); + +done: + mgmt_pending_free(cmd); } static int set_mesh_sync(struct hci_dev *hdev, void *data) { struct mgmt_pending_cmd *cmd = data; - struct mgmt_cp_set_mesh cp; + DEFINE_FLEX(struct mgmt_cp_set_mesh, cp, ad_types, num_ad_types, + sizeof(hdev->mesh_ad_types)); size_t len; mutex_lock(&hdev->mgmt_pending_lock); @@ -2197,27 +2202,26 @@ static int set_mesh_sync(struct hci_dev *hdev, void *data) return -ECANCELED; } - memcpy(&cp, cmd->param, sizeof(cp)); + len = cmd->param_len; + memcpy(cp, cmd->param, min(__struct_size(cp), len)); mutex_unlock(&hdev->mgmt_pending_lock); - len = cmd->param_len; - memset(hdev->mesh_ad_types, 0, sizeof(hdev->mesh_ad_types)); - if (cp.enable) + if (cp->enable) hci_dev_set_flag(hdev, HCI_MESH); else hci_dev_clear_flag(hdev, HCI_MESH); - hdev->le_scan_interval = __le16_to_cpu(cp.period); - hdev->le_scan_window = __le16_to_cpu(cp.window); + hdev->le_scan_interval = __le16_to_cpu(cp->period); + hdev->le_scan_window = __le16_to_cpu(cp->window); - len -= sizeof(cp); + len -= sizeof(struct mgmt_cp_set_mesh); /* If filters don't fit, forward all adv pkts */ if (len <= sizeof(hdev->mesh_ad_types)) - memcpy(hdev->mesh_ad_types, cp.ad_types, len); + memcpy(hdev->mesh_ad_types, cp->ad_types, len); hci_update_passive_scan_sync(hdev); return 0; From 76e20da0bd00c556ed0a1e7250bdb6ac3e808ea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Danis?= Date: Mon, 6 Oct 2025 10:35:44 +0200 Subject: [PATCH 200/263] Revert "Bluetooth: L2CAP: convert timeouts to secs_to_jiffies()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit c9d84da18d1e0d28a7e16ca6df8e6d47570501d4. It replaces in L2CAP calls to msecs_to_jiffies() to secs_to_jiffies() and updates the constants accordingly. But the constants are also used in LCAP Configure Request and L2CAP Configure Response which expect values in milliseconds. This may prevent correct usage of L2CAP channel. To fix it, keep those constants in milliseconds and so revert this change. Fixes: c9d84da18d1e ("Bluetooth: L2CAP: convert timeouts to secs_to_jiffies()") Signed-off-by: Frédéric Danis Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/l2cap.h | 4 ++-- net/bluetooth/l2cap_core.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 4bb0eaedda18..00e182a22720 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -38,8 +38,8 @@ #define L2CAP_DEFAULT_TX_WINDOW 63 #define L2CAP_DEFAULT_EXT_WINDOW 0x3FFF #define L2CAP_DEFAULT_MAX_TX 3 -#define L2CAP_DEFAULT_RETRANS_TO 2 /* seconds */ -#define L2CAP_DEFAULT_MONITOR_TO 12 /* seconds */ +#define L2CAP_DEFAULT_RETRANS_TO 2000 /* 2 seconds */ +#define L2CAP_DEFAULT_MONITOR_TO 12000 /* 12 seconds */ #define L2CAP_DEFAULT_MAX_PDU_SIZE 1492 /* Sized for AMP packet */ #define L2CAP_DEFAULT_ACK_TO 200 #define L2CAP_DEFAULT_MAX_SDU_SIZE 0xFFFF diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 805c752ac0a9..d08320380ad6 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -282,7 +282,7 @@ static void __set_retrans_timer(struct l2cap_chan *chan) if (!delayed_work_pending(&chan->monitor_timer) && chan->retrans_timeout) { l2cap_set_timer(chan, &chan->retrans_timer, - secs_to_jiffies(chan->retrans_timeout)); + msecs_to_jiffies(chan->retrans_timeout)); } } @@ -291,7 +291,7 @@ static void __set_monitor_timer(struct l2cap_chan *chan) __clear_retrans_timer(chan); if (chan->monitor_timeout) { l2cap_set_timer(chan, &chan->monitor_timer, - secs_to_jiffies(chan->monitor_timeout)); + msecs_to_jiffies(chan->monitor_timeout)); } } From c403da5e98b04a2aec9cfb25cbeeb28d7ce29975 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 7 Oct 2025 13:29:15 -0400 Subject: [PATCH 201/263] Bluetooth: ISO: Fix another instance of dst_type handling Socket dst_type cannot be directly assigned to hci_conn->type since there domain is different which may lead to the wrong address type being used. Fixes: 6a5ad251b7cd ("Bluetooth: ISO: Fix possible circular locking dependency") Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/iso.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index 954e1916506b..3d98cb6291da 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -2046,7 +2046,13 @@ static void iso_conn_ready(struct iso_conn *conn) } bacpy(&iso_pi(sk)->dst, &hcon->dst); - iso_pi(sk)->dst_type = hcon->dst_type; + + /* Convert from HCI to three-value type */ + if (hcon->dst_type == ADDR_LE_DEV_PUBLIC) + iso_pi(sk)->dst_type = BDADDR_LE_PUBLIC; + else + iso_pi(sk)->dst_type = BDADDR_LE_RANDOM; + iso_pi(sk)->sync_handle = iso_pi(parent)->sync_handle; memcpy(iso_pi(sk)->base, iso_pi(parent)->base, iso_pi(parent)->base_len); iso_pi(sk)->base_len = iso_pi(parent)->base_len; From 057b6ca5961203f16a2a02fb0592661a7a959a84 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Thu, 16 Oct 2025 10:00:43 +0530 Subject: [PATCH 202/263] Bluetooth: btintel_pcie: Fix event packet loss issue In the current btintel_pcie driver implementation, when an interrupt is received, the driver checks for the alive cause before the TX/RX cause. Handling the alive cause involves resetting the TX/RX queue indices. This flow works correctly when the causes are mutually exclusive. However, if both cause bits are set simultaneously, the alive cause resets the queue indices, resulting in an event packet drop and a command timeout. To fix this issue, the driver is modified to handle all other causes before checking for the alive cause. Test case: Issue is seen with stress reboot scenario - 50x run [20.337589] Bluetooth: hci0: Device revision is 0 [20.346750] Bluetooth: hci0: Secure boot is enabled [20.346752] Bluetooth: hci0: OTP lock is disabled [20.346752] Bluetooth: hci0: API lock is enabled [20.346752] Bluetooth: hci0: Debug lock is disabled [20.346753] Bluetooth: hci0: Minimum firmware build 1 week 10 2014 [20.346754] Bluetooth: hci0: Bootloader timestamp 2023.43 buildtype 1 build 11631 [20.359070] Bluetooth: hci0: Found device firmware: intel/ibt-00a0-00a1-iml.sfi [20.371499] Bluetooth: hci0: Boot Address: 0xb02ff800 [20.385769] Bluetooth: hci0: Firmware Version: 166-34.25 [20.538257] Bluetooth: hci0: Waiting for firmware download to complete [20.554424] Bluetooth: hci0: Firmware loaded in 178651 usecs [21.081588] Bluetooth: hci0: Timeout (500 ms) on tx completion [21.096541] Bluetooth: hci0: Failed to send frame (-62) [21.110240] Bluetooth: hci0: sending frame failed (-62) [21.138551] Bluetooth: hci0: Failed to send Intel Reset command [21.170153] Bluetooth: hci0: Intel Soft Reset failed (-62) Signed-off-by: Kiran K Signed-off-by: Sai Teja Aluvala Reviewed-by: Paul Menzel Fixes: c2b636b3f788 ("Bluetooth: btintel_pcie: Add support for PCIe transport") Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel_pcie.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 6d3963bd56a9..a075d8ec4677 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1467,11 +1467,6 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP1) btintel_pcie_msix_gp1_handler(data); - /* This interrupt is triggered by the firmware after updating - * boot_stage register and image_response register - */ - if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP0) - btintel_pcie_msix_gp0_handler(data); /* For TX */ if (intr_fh & BTINTEL_PCIE_MSIX_FH_INT_CAUSES_0) { @@ -1487,6 +1482,12 @@ static irqreturn_t btintel_pcie_irq_msix_handler(int irq, void *dev_id) btintel_pcie_msix_tx_handle(data); } + /* This interrupt is triggered by the firmware after updating + * boot_stage register and image_response register + */ + if (intr_hw & BTINTEL_PCIE_MSIX_HW_INT_CAUSES_GP0) + btintel_pcie_msix_gp0_handler(data); + /* * Before sending the interrupt the HW disables it to prevent a nested * interrupt. This is done by writing 1 to the corresponding bit in From b489556a856d31f1eb73972150f371d2e4ce1de8 Mon Sep 17 00:00:00 2001 From: Calvin Owens Date: Thu, 23 Oct 2025 11:47:19 -0700 Subject: [PATCH 203/263] Bluetooth: fix corruption in h4_recv_buf() after cleanup A different structure is stored in drvdata for the drivers which used that duplicate function, but h4_recv_buf() assumes drvdata is always an hci_uart structure. Consequently, alignment and padding are now randomly corrupted for btmtkuart, btnxpuart, and bpa10x in h4_recv_buf(), causing erratic breakage. Fix this by making the hci_uart structure the explicit argument to h4_recv_buf(). Every caller already has a reference to hci_uart, and already obtains the hci_hdev reference through it, so this actually eliminates a redundant pointer indirection for all existing callers. Fixes: 93f06f8f0daf ("Bluetooth: remove duplicate h4_recv_buf() in header") Reported-by: Francesco Valla Closes: https://lore.kernel.org/lkml/6837167.ZASKD2KPVS@fedora.fritz.box/ Signed-off-by: Calvin Owens Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/bpa10x.c | 4 +++- drivers/bluetooth/btmtkuart.c | 4 +++- drivers/bluetooth/btnxpuart.c | 4 +++- drivers/bluetooth/hci_ag6xx.c | 2 +- drivers/bluetooth/hci_aml.c | 2 +- drivers/bluetooth/hci_ath.c | 2 +- drivers/bluetooth/hci_bcm.c | 2 +- drivers/bluetooth/hci_h4.c | 6 +++--- drivers/bluetooth/hci_intel.c | 2 +- drivers/bluetooth/hci_ll.c | 2 +- drivers/bluetooth/hci_mrvl.c | 6 +++--- drivers/bluetooth/hci_nokia.c | 4 ++-- drivers/bluetooth/hci_qca.c | 2 +- drivers/bluetooth/hci_uart.h | 2 +- 14 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index b7ba667a3d09..e305d04aac9d 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -41,6 +41,7 @@ struct bpa10x_data { struct usb_anchor rx_anchor; struct sk_buff *rx_skb[2]; + struct hci_uart hu; }; static void bpa10x_tx_complete(struct urb *urb) @@ -96,7 +97,7 @@ static void bpa10x_rx_complete(struct urb *urb) if (urb->status == 0) { bool idx = usb_pipebulk(urb->pipe); - data->rx_skb[idx] = h4_recv_buf(hdev, data->rx_skb[idx], + data->rx_skb[idx] = h4_recv_buf(&data->hu, data->rx_skb[idx], urb->transfer_buffer, urb->actual_length, bpa10x_recv_pkts, @@ -388,6 +389,7 @@ static int bpa10x_probe(struct usb_interface *intf, hci_set_drvdata(hdev, data); data->hdev = hdev; + data->hu.hdev = hdev; SET_HCIDEV_DEV(hdev, &intf->dev); diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index d9b90ea2ad38..27aa48ff3ac2 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -79,6 +79,7 @@ struct btmtkuart_dev { u16 stp_dlen; const struct btmtkuart_data *data; + struct hci_uart hu; }; #define btmtkuart_is_standalone(bdev) \ @@ -368,7 +369,7 @@ static void btmtkuart_recv(struct hci_dev *hdev, const u8 *data, size_t count) sz_left -= adv; p_left += adv; - bdev->rx_skb = h4_recv_buf(bdev->hdev, bdev->rx_skb, p_h4, + bdev->rx_skb = h4_recv_buf(&bdev->hu, bdev->rx_skb, p_h4, sz_h4, mtk_recv_pkts, ARRAY_SIZE(mtk_recv_pkts)); if (IS_ERR(bdev->rx_skb)) { @@ -858,6 +859,7 @@ static int btmtkuart_probe(struct serdev_device *serdev) } bdev->hdev = hdev; + bdev->hu.hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, bdev); diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index d5153fed0518..3b1e9224e965 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -212,6 +212,7 @@ struct btnxpuart_dev { struct ps_data psdata; struct btnxpuart_data *nxp_data; struct reset_control *pdn; + struct hci_uart hu; }; #define NXP_V1_FW_REQ_PKT 0xa5 @@ -1756,7 +1757,7 @@ static size_t btnxpuart_receive_buf(struct serdev_device *serdev, ps_start_timer(nxpdev); - nxpdev->rx_skb = h4_recv_buf(nxpdev->hdev, nxpdev->rx_skb, data, count, + nxpdev->rx_skb = h4_recv_buf(&nxpdev->hu, nxpdev->rx_skb, data, count, nxp_recv_pkts, ARRAY_SIZE(nxp_recv_pkts)); if (IS_ERR(nxpdev->rx_skb)) { int err = PTR_ERR(nxpdev->rx_skb); @@ -1875,6 +1876,7 @@ static int nxp_serdev_probe(struct serdev_device *serdev) reset_control_deassert(nxpdev->pdn); nxpdev->hdev = hdev; + nxpdev->hu.hdev = hdev; hdev->bus = HCI_UART; hci_set_drvdata(hdev, nxpdev); diff --git a/drivers/bluetooth/hci_ag6xx.c b/drivers/bluetooth/hci_ag6xx.c index 2d40302409ff..94588676510f 100644 --- a/drivers/bluetooth/hci_ag6xx.c +++ b/drivers/bluetooth/hci_ag6xx.c @@ -105,7 +105,7 @@ static int ag6xx_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - ag6xx->rx_skb = h4_recv_buf(hu->hdev, ag6xx->rx_skb, data, count, + ag6xx->rx_skb = h4_recv_buf(hu, ag6xx->rx_skb, data, count, ag6xx_recv_pkts, ARRAY_SIZE(ag6xx_recv_pkts)); if (IS_ERR(ag6xx->rx_skb)) { diff --git a/drivers/bluetooth/hci_aml.c b/drivers/bluetooth/hci_aml.c index 707e90f80130..b1f32c5a8a3f 100644 --- a/drivers/bluetooth/hci_aml.c +++ b/drivers/bluetooth/hci_aml.c @@ -650,7 +650,7 @@ static int aml_recv(struct hci_uart *hu, const void *data, int count) struct aml_data *aml_data = hu->priv; int err; - aml_data->rx_skb = h4_recv_buf(hu->hdev, aml_data->rx_skb, data, count, + aml_data->rx_skb = h4_recv_buf(hu, aml_data->rx_skb, data, count, aml_recv_pkts, ARRAY_SIZE(aml_recv_pkts)); if (IS_ERR(aml_data->rx_skb)) { diff --git a/drivers/bluetooth/hci_ath.c b/drivers/bluetooth/hci_ath.c index dbfe34664633..8d2b5e7f0d6a 100644 --- a/drivers/bluetooth/hci_ath.c +++ b/drivers/bluetooth/hci_ath.c @@ -191,7 +191,7 @@ static int ath_recv(struct hci_uart *hu, const void *data, int count) { struct ath_struct *ath = hu->priv; - ath->rx_skb = h4_recv_buf(hu->hdev, ath->rx_skb, data, count, + ath->rx_skb = h4_recv_buf(hu, ath->rx_skb, data, count, ath_recv_pkts, ARRAY_SIZE(ath_recv_pkts)); if (IS_ERR(ath->rx_skb)) { int err = PTR_ERR(ath->rx_skb); diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index f96617b85d87..fff845ed44e3 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -698,7 +698,7 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - bcm->rx_skb = h4_recv_buf(hu->hdev, bcm->rx_skb, data, count, + bcm->rx_skb = h4_recv_buf(hu, bcm->rx_skb, data, count, bcm_recv_pkts, ARRAY_SIZE(bcm_recv_pkts)); if (IS_ERR(bcm->rx_skb)) { int err = PTR_ERR(bcm->rx_skb); diff --git a/drivers/bluetooth/hci_h4.c b/drivers/bluetooth/hci_h4.c index 9070e31a68bf..ec017df8572c 100644 --- a/drivers/bluetooth/hci_h4.c +++ b/drivers/bluetooth/hci_h4.c @@ -112,7 +112,7 @@ static int h4_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - h4->rx_skb = h4_recv_buf(hu->hdev, h4->rx_skb, data, count, + h4->rx_skb = h4_recv_buf(hu, h4->rx_skb, data, count, h4_recv_pkts, ARRAY_SIZE(h4_recv_pkts)); if (IS_ERR(h4->rx_skb)) { int err = PTR_ERR(h4->rx_skb); @@ -151,12 +151,12 @@ int __exit h4_deinit(void) return hci_uart_unregister_proto(&h4p); } -struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb, +struct sk_buff *h4_recv_buf(struct hci_uart *hu, struct sk_buff *skb, const unsigned char *buffer, int count, const struct h4_recv_pkt *pkts, int pkts_count) { - struct hci_uart *hu = hci_get_drvdata(hdev); u8 alignment = hu->alignment ? hu->alignment : 1; + struct hci_dev *hdev = hu->hdev; /* Check for error from previous call */ if (IS_ERR(skb)) diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 9b353c3d6442..1d6e09508f1f 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -972,7 +972,7 @@ static int intel_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - intel->rx_skb = h4_recv_buf(hu->hdev, intel->rx_skb, data, count, + intel->rx_skb = h4_recv_buf(hu, intel->rx_skb, data, count, intel_recv_pkts, ARRAY_SIZE(intel_recv_pkts)); if (IS_ERR(intel->rx_skb)) { diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c index 7044c86325ce..6f4e25917b86 100644 --- a/drivers/bluetooth/hci_ll.c +++ b/drivers/bluetooth/hci_ll.c @@ -429,7 +429,7 @@ static int ll_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - ll->rx_skb = h4_recv_buf(hu->hdev, ll->rx_skb, data, count, + ll->rx_skb = h4_recv_buf(hu, ll->rx_skb, data, count, ll_recv_pkts, ARRAY_SIZE(ll_recv_pkts)); if (IS_ERR(ll->rx_skb)) { int err = PTR_ERR(ll->rx_skb); diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c index e08222395772..8767522ec4c6 100644 --- a/drivers/bluetooth/hci_mrvl.c +++ b/drivers/bluetooth/hci_mrvl.c @@ -264,9 +264,9 @@ static int mrvl_recv(struct hci_uart *hu, const void *data, int count) !test_bit(STATE_FW_LOADED, &mrvl->flags)) return count; - mrvl->rx_skb = h4_recv_buf(hu->hdev, mrvl->rx_skb, data, count, - mrvl_recv_pkts, - ARRAY_SIZE(mrvl_recv_pkts)); + mrvl->rx_skb = h4_recv_buf(hu, mrvl->rx_skb, data, count, + mrvl_recv_pkts, + ARRAY_SIZE(mrvl_recv_pkts)); if (IS_ERR(mrvl->rx_skb)) { int err = PTR_ERR(mrvl->rx_skb); bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c index cd7575c20f65..1e65b541f8ad 100644 --- a/drivers/bluetooth/hci_nokia.c +++ b/drivers/bluetooth/hci_nokia.c @@ -624,8 +624,8 @@ static int nokia_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - btdev->rx_skb = h4_recv_buf(hu->hdev, btdev->rx_skb, data, count, - nokia_recv_pkts, ARRAY_SIZE(nokia_recv_pkts)); + btdev->rx_skb = h4_recv_buf(hu, btdev->rx_skb, data, count, + nokia_recv_pkts, ARRAY_SIZE(nokia_recv_pkts)); if (IS_ERR(btdev->rx_skb)) { err = PTR_ERR(btdev->rx_skb); dev_err(dev, "Frame reassembly failed (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4cff4d9be313..888176b0faa9 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1277,7 +1277,7 @@ static int qca_recv(struct hci_uart *hu, const void *data, int count) if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) return -EUNATCH; - qca->rx_skb = h4_recv_buf(hu->hdev, qca->rx_skb, data, count, + qca->rx_skb = h4_recv_buf(hu, qca->rx_skb, data, count, qca_recv_pkts, ARRAY_SIZE(qca_recv_pkts)); if (IS_ERR(qca->rx_skb)) { int err = PTR_ERR(qca->rx_skb); diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index cbbe79b241ce..48ac7ca9334e 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -162,7 +162,7 @@ struct h4_recv_pkt { int h4_init(void); int h4_deinit(void); -struct sk_buff *h4_recv_buf(struct hci_dev *hdev, struct sk_buff *skb, +struct sk_buff *h4_recv_buf(struct hci_uart *hu, struct sk_buff *skb, const unsigned char *buffer, int count, const struct h4_recv_pkt *pkts, int pkts_count); #endif From 857eb0fabc389be5159e0e17d84bc122614b5b98 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Oct 2025 16:29:41 -0400 Subject: [PATCH 204/263] Bluetooth: hci_conn: Fix connection cleanup with BIG with 2 or more BIS This fixes bis_cleanup not considering connections in BT_OPEN state before attempting to remove the BIG causing the following error: btproxy[20110]: < HCI Command: LE Terminate Broadcast Isochronous Group (0x08|0x006a) plen 2 BIG Handle: 0x01 Reason: Connection Terminated By Local Host (0x16) > HCI Event: Command Status (0x0f) plen 4 LE Terminate Broadcast Isochronous Group (0x08|0x006a) ncmd 1 Status: Unknown Advertising Identifier (0x42) Fixes: fa224d0c094a ("Bluetooth: ISO: Reassociate a socket with an active BIS") Signed-off-by: Luiz Augusto von Dentz Reviewed-by: Paul Menzel --- net/bluetooth/hci_conn.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 111f0e37b672..c5dedf39a129 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -843,6 +843,13 @@ static void bis_cleanup(struct hci_conn *conn) if (bis) return; + bis = hci_conn_hash_lookup_big_state(hdev, + conn->iso_qos.bcast.big, + BT_OPEN, + HCI_ROLE_MASTER); + if (bis) + return; + hci_le_terminate_big(hdev, conn); } else { hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, From 751463ceefc3397566d03c8b64ef4a77f5fd88ac Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 22 Oct 2025 16:03:19 -0400 Subject: [PATCH 205/263] Bluetooth: hci_core: Fix tracking of periodic advertisement Periodic advertising enabled flag cannot be tracked by the enabled flag since advertising and periodic advertising each can be enabled/disabled separately from one another causing the states to be inconsistent when for example an advertising set is disabled its enabled flag is set to false which is then used for periodic which has not being disabled. Fixes: eca0ae4aea66 ("Bluetooth: Add initial implementation of BIS connections") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_event.c | 7 +++++-- net/bluetooth/hci_sync.c | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 2924c2bf2a98..b8100dbfe5d7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -244,6 +244,7 @@ struct adv_info { bool enabled; bool pending; bool periodic; + bool periodic_enabled; __u8 mesh; __u8 instance; __u8 handle; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 1dabf5a7ae18..d37db364acf7 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1607,7 +1607,7 @@ static u8 hci_cc_le_set_ext_adv_enable(struct hci_dev *hdev, void *data, hci_dev_set_flag(hdev, HCI_LE_ADV); - if (adv && !adv->periodic) + if (adv) adv->enabled = true; else if (!set->handle) hci_dev_set_flag(hdev, HCI_LE_ADV_0); @@ -3963,8 +3963,11 @@ static u8 hci_cc_le_set_per_adv_enable(struct hci_dev *hdev, void *data, hci_dev_set_flag(hdev, HCI_LE_PER_ADV); if (adv) - adv->enabled = true; + adv->periodic_enabled = true; } else { + if (adv) + adv->periodic_enabled = false; + /* If just one instance was disabled check if there are * any other instance enabled before clearing HCI_LE_PER_ADV. * The current periodic adv instance will be marked as diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 28ad08cd7d70..73fc41b68b68 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1607,7 +1607,7 @@ int hci_disable_per_advertising_sync(struct hci_dev *hdev, u8 instance) /* If periodic advertising already disabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); - if (!adv || !adv->periodic || !adv->enabled) + if (!adv || !adv->periodic_enabled) return 0; memset(&cp, 0, sizeof(cp)); @@ -1672,7 +1672,7 @@ static int hci_enable_per_advertising_sync(struct hci_dev *hdev, u8 instance) /* If periodic advertising already enabled there is nothing to do. */ adv = hci_find_adv_instance(hdev, instance); - if (adv && adv->periodic && adv->enabled) + if (adv && adv->periodic_enabled) return 0; memset(&cp, 0, sizeof(cp)); From 91d35ec9b3956d6b3cf789c1593467e58855b03a Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 23 Oct 2025 14:05:30 +0200 Subject: [PATCH 206/263] Bluetooth: rfcomm: fix modem control handling The RFCOMM driver confuses the local and remote modem control signals, which specifically means that the reported DTR and RTS state will instead reflect the remote end (i.e. DSR and CTS). This issue dates back to the original driver (and a follow-on update) merged in 2002, which resulted in a non-standard implementation of TIOCMSET that allowed controlling also the TS07.10 IC and DV signals by mapping them to the RI and DCD input flags, while TIOCMGET failed to return the actual state of DTR and RTS. Note that the bogus control of input signals in tiocmset() is just dead code as those flags will have been masked out by the tty layer since 2003. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Luiz Augusto von Dentz --- net/bluetooth/rfcomm/tty.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index 376ce6de84be..b783526ab588 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -643,8 +643,8 @@ static void rfcomm_dev_modem_status(struct rfcomm_dlc *dlc, u8 v24_sig) tty_port_tty_hangup(&dev->port, true); dev->modem_status = - ((v24_sig & RFCOMM_V24_RTC) ? (TIOCM_DSR | TIOCM_DTR) : 0) | - ((v24_sig & RFCOMM_V24_RTR) ? (TIOCM_RTS | TIOCM_CTS) : 0) | + ((v24_sig & RFCOMM_V24_RTC) ? TIOCM_DSR : 0) | + ((v24_sig & RFCOMM_V24_RTR) ? TIOCM_CTS : 0) | ((v24_sig & RFCOMM_V24_IC) ? TIOCM_RI : 0) | ((v24_sig & RFCOMM_V24_DV) ? TIOCM_CD : 0); } @@ -1055,10 +1055,14 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) static int rfcomm_tty_tiocmget(struct tty_struct *tty) { struct rfcomm_dev *dev = tty->driver_data; + struct rfcomm_dlc *dlc = dev->dlc; + u8 v24_sig; BT_DBG("tty %p dev %p", tty, dev); - return dev->modem_status; + rfcomm_dlc_get_modem_status(dlc, &v24_sig); + + return (v24_sig & (TIOCM_DTR | TIOCM_RTS)) | dev->modem_status; } static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) @@ -1071,23 +1075,15 @@ static int rfcomm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigne rfcomm_dlc_get_modem_status(dlc, &v24_sig); - if (set & TIOCM_DSR || set & TIOCM_DTR) + if (set & TIOCM_DTR) v24_sig |= RFCOMM_V24_RTC; - if (set & TIOCM_RTS || set & TIOCM_CTS) + if (set & TIOCM_RTS) v24_sig |= RFCOMM_V24_RTR; - if (set & TIOCM_RI) - v24_sig |= RFCOMM_V24_IC; - if (set & TIOCM_CD) - v24_sig |= RFCOMM_V24_DV; - if (clear & TIOCM_DSR || clear & TIOCM_DTR) + if (clear & TIOCM_DTR) v24_sig &= ~RFCOMM_V24_RTC; - if (clear & TIOCM_RTS || clear & TIOCM_CTS) + if (clear & TIOCM_RTS) v24_sig &= ~RFCOMM_V24_RTR; - if (clear & TIOCM_RI) - v24_sig &= ~RFCOMM_V24_IC; - if (clear & TIOCM_CD) - v24_sig &= ~RFCOMM_V24_DV; rfcomm_dlc_set_modem_status(dlc, v24_sig); From 84dfce65a7ae7b11c7b13285a1b23e9a94ad37b7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 24 Oct 2025 14:59:59 +0200 Subject: [PATCH 207/263] x86/bugs: Remove dead code which might prevent from building Clang, in particular, is not happy about dead code: arch/x86/kernel/cpu/bugs.c:1830:20: error: unused function 'match_option' [-Werror,-Wunused-function] 1830 | static inline bool match_option(const char *arg, int arglen, const char *opt) | ^~~~~~~~~~~~ 1 error generated. Remove a leftover from the previous cleanup. Fixes: 02ac6cc8c5a1 ("x86/bugs: Simplify SSB cmdline parsing") Signed-off-by: Andy Shevchenko Signed-off-by: Dave Hansen Link: https://patch.msgid.link/20251024125959.1526277-1-andriy.shevchenko%40linux.intel.com --- arch/x86/kernel/cpu/bugs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e08de5b0d20b..d7fa03bf51b4 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1827,13 +1827,6 @@ void unpriv_ebpf_notify(int new_state) } #endif -static inline bool match_option(const char *arg, int arglen, const char *opt) -{ - int len = strlen(opt); - - return len == arglen && !strncmp(arg, opt, len); -} - /* The kernel command line selection for spectre v2 */ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_NONE, From b2a578f3127ab9ef80114cef9b20a2b42a8ee77a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Oct 2025 16:08:24 +0200 Subject: [PATCH 208/263] soc: officially expand maintainership team Since Olof moved on from the soc tree maintenance, Arnd has mainly taken care of the day-to-day activities around the SoC tree by himself, which is generally not a good setup. Krzysztof, Linus and Alexandre have volunteered to become co-maintainers of the SoC tree, with the plan of taking turns to do merges and reviews to spread the workload. In addition, Drew joins as another reviewer. Acked-by: Krzysztof Kozlowski Acked-by: Alexandre Belloni Acked-by: Linus Walleij Acked-by: Drew Fustini Signed-off-by: Arnd Bergmann --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46126ce2f968..bb627c2fb438 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1997,6 +1997,10 @@ F: include/uapi/linux/if_arcnet.h ARM AND ARM64 SoC SUB-ARCHITECTURES (COMMON PARTS) M: Arnd Bergmann +M: Krzysztof Kozlowski +M: Alexandre Belloni +M: Linus Walleij +R: Drew Fustini L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: soc@lists.linux.dev S: Maintained From 73ba88fb04081372a69f0395958ac6b65d53d134 Mon Sep 17 00:00:00 2001 From: Nirbhay Sharma Date: Sat, 25 Oct 2025 02:02:19 +0530 Subject: [PATCH 209/263] firewire: init_ohci1394_dma: add missing function parameter documentation Add missing kernel-doc parameter descriptions for five functions in init_ohci1394_dma.c to fix documentation warnings when building with W=1. This patch addresses the following warnings: - init_ohci1394_wait_for_busresets: missing @ohci description - init_ohci1394_enable_physical_dma: missing @ohci description - init_ohci1394_reset_and_init_dma: missing @ohci description - init_ohci1394_controller: missing @num, @slot, @func descriptions - setup_ohci1394_dma: missing @opt description Tested with GCC 13.2.0 and W=1 flag. All documentation warnings for these functions have been resolved. Signed-off-by: Nirbhay Sharma Link: https://lore.kernel.org/r/20251024203219.101990-2-nirbhay.lkd@gmail.com Signed-off-by: Takashi Sakamoto --- drivers/firewire/init_ohci1394_dma.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/firewire/init_ohci1394_dma.c b/drivers/firewire/init_ohci1394_dma.c index 48b879e9e831..121f0c2f6401 100644 --- a/drivers/firewire/init_ohci1394_dma.c +++ b/drivers/firewire/init_ohci1394_dma.c @@ -167,6 +167,7 @@ static inline void __init init_ohci1394_initialize(struct ohci *ohci) /** * init_ohci1394_wait_for_busresets - wait until bus resets are completed + * @ohci: Pointer to the OHCI-1394 controller structure * * OHCI1394 initialization itself and any device going on- or offline * and any cable issue cause a IEEE1394 bus reset. The OHCI1394 spec @@ -189,6 +190,8 @@ static inline void __init init_ohci1394_wait_for_busresets(struct ohci *ohci) /** * init_ohci1394_enable_physical_dma - Enable physical DMA for remote debugging + * @ohci: Pointer to the OHCI-1394 controller structure + * * This enables remote DMA access over IEEE1394 from every host for the low * 4GB of address space. DMA accesses above 4GB are not available currently. */ @@ -201,6 +204,8 @@ static inline void __init init_ohci1394_enable_physical_dma(struct ohci *ohci) /** * init_ohci1394_reset_and_init_dma - init controller and enable DMA + * @ohci: Pointer to the OHCI-1394 controller structure + * * This initializes the given controller and enables physical DMA engine in it. */ static inline void __init init_ohci1394_reset_and_init_dma(struct ohci *ohci) @@ -230,6 +235,10 @@ static inline void __init init_ohci1394_reset_and_init_dma(struct ohci *ohci) /** * init_ohci1394_controller - Map the registers of the controller and init DMA + * @num: PCI bus number + * @slot: PCI device number + * @func: PCI function number + * * This maps the registers of the specified controller and initializes it */ static inline void __init init_ohci1394_controller(int num, int slot, int func) @@ -284,6 +293,7 @@ void __init init_ohci1394_dma_on_all_controllers(void) /** * setup_ohci1394_dma - enables early OHCI1394 DMA initialization + * @opt: Kernel command line parameter string */ static int __init setup_ohci1394_dma(char *opt) { From 53abe3e1c154628cc74e33a1bfcd865656e433a5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 15 Oct 2025 11:19:34 +0200 Subject: [PATCH 210/263] sched: Remove never used code in mm_cid_get() Clang is not happy with set but unused variable (this is visible with `make W=1` build: kernel/sched/sched.h:3744:18: error: variable 'cpumask' set but not used [-Werror,-Wunused-but-set-variable] It seems like the variable was never used along with the assignment that does not have side effects as far as I can see. Remove those altogether. Fixes: 223baf9d17f2 ("sched: Fix performance regression introduced by mm_cid") Signed-off-by: Andy Shevchenko Tested-by: Eric Biggers Reviewed-by: Breno Leitao Signed-off-by: Linus Torvalds --- kernel/sched/sched.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1f5d07067f60..361f9101cef9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3740,11 +3740,9 @@ static inline int mm_cid_get(struct rq *rq, struct task_struct *t, struct mm_struct *mm) { struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; - struct cpumask *cpumask; int cid; lockdep_assert_rq_held(rq); - cpumask = mm_cidmask(mm); cid = __this_cpu_read(pcpu_cid->cid); if (mm_cid_is_valid(cid)) { mm_cid_snapshot_time(rq, mm); From 84a905290cb4c3d9a71a9e3b2f2e02e031e7512f Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Thu, 23 Oct 2025 16:48:53 +0200 Subject: [PATCH 211/263] net: phy: dp83867: Disable EEE support as not implemented While the DP83867 PHYs report EEE capability through their feature registers, the actual hardware does not support EEE (see Links). When the connected MAC enables EEE, it causes link instability and communication failures. The issue is reproducible with a iMX8MP and relevant stmmac ethernet port. Since the introduction of phylink-managed EEE support in the stmmac driver, EEE is now enabled by default, leading to issues on systems using the DP83867 PHY. Call phy_disable_eee during phy initialization to prevent EEE from being enabled on DP83867 PHYs. Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/1445244/dp83867ir-dp83867-disable-eee-lpi Link: https://e2e.ti.com/support/interface-group/interface/f/interface-forum/658638/dp83867ir-eee-energy-efficient-ethernet Fixes: 2a10154abcb7 ("net: phy: dp83867: Add TI dp83867 phy") Cc: stable@vger.kernel.org Signed-off-by: Emanuele Ghidoli Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20251023144857.529566-1-ghidoliemanuele@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83867.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index deeefb962566..36a0c1b7f59c 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -738,6 +738,12 @@ static int dp83867_config_init(struct phy_device *phydev) return ret; } + /* Although the DP83867 reports EEE capability through the + * MDIO_PCS_EEE_ABLE and MDIO_AN_EEE_ADV registers, the feature + * is not actually implemented in hardware. + */ + phy_disable_eee(phydev); + if (phy_interface_is_rgmii(phydev) || phydev->interface == PHY_INTERFACE_MODE_SGMII) { val = phy_read(phydev, MII_DP83867_PHYCTRL); From dcb6fa37fd7bc9c3d2b066329b0d27dedf8becaa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Oct 2025 15:59:49 -0700 Subject: [PATCH 212/263] Linux 6.18-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d14824792227..b34a1f4c0396 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* From 6f40e50ceb99fc8ef37e5c56e2ec1d162733fef0 Mon Sep 17 00:00:00 2001 From: Qianchang Zhao Date: Wed, 22 Oct 2025 15:27:47 +0900 Subject: [PATCH 213/263] ksmbd: transport_ipc: validate payload size before reading handle handle_response() dereferences the payload as a 4-byte handle without verifying that the declared payload size is at least 4 bytes. A malformed or truncated message from ksmbd.mountd can lead to a 4-byte read past the declared payload size. Validate the size before dereferencing. This is a minimal fix to guard the initial handle read. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: stable@vger.kernel.org Reported-by: Qianchang Zhao Signed-off-by: Qianchang Zhao Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_ipc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c index 46f87fd1ce1c..2c08cccfa680 100644 --- a/fs/smb/server/transport_ipc.c +++ b/fs/smb/server/transport_ipc.c @@ -263,10 +263,16 @@ static void ipc_msg_handle_free(int handle) static int handle_response(int type, void *payload, size_t sz) { - unsigned int handle = *(unsigned int *)payload; + unsigned int handle; struct ipc_msg_table_entry *entry; int ret = 0; + /* Prevent 4-byte read beyond declared payload size */ + if (sz < sizeof(unsigned int)) + return -EINVAL; + + handle = *(unsigned int *)payload; + ipc_update_last_active(); down_read(&ipc_msg_table_lock); hash_for_each_possible(ipc_msg_table, entry, ipc_table_hlist, handle) { From 975f05a7647720b6a82dac73463eaeca3067de71 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 16:07:13 +0200 Subject: [PATCH 214/263] smb: server: call smb_direct_post_recv_credits() when the negotiation is done We now activate sc->recv_io.posted.refill_work and sc->idle.immediate_work only after a successful negotiation, before sending the negotiation response. It means the queue_work(sc->workqueue, &sc->recv_io.posted.refill_work) in put_recvmsg() of the negotiate request, is a no-op now. It also means our explicit smb_direct_post_recv_credits() will have queue_work(sc->workqueue, &sc->idle.immediate_work) as no-op. This should make sure we don't have races and post any immediate data_transfer message that tries to grant credits to the peer, before we send the negotiation response, as that will grant the initial credits to the peer. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Fixes: 1cde0a74a7a8 ("smb: server: don't use delayed_work for post_recv_credits_work") Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 36 ++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 89b02efdba0c..e70fc447e815 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -418,9 +418,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id) sc->ib.dev = sc->rdma.cm_id->device; - INIT_WORK(&sc->recv_io.posted.refill_work, - smb_direct_post_recv_credits); - INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer); conn = ksmbd_conn_alloc(); @@ -1904,7 +1901,6 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) goto out_err; } - smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); return 0; out_err: put_recvmsg(sc, recvmsg); @@ -2249,8 +2245,8 @@ static int smb_direct_prepare(struct ksmbd_transport *t) return -ECONNABORTED; ret = smb_direct_check_recvmsg(recvmsg); - if (ret == -ECONNABORTED) - goto out; + if (ret) + goto put; req = (struct smbdirect_negotiate_req *)recvmsg->packet; sp->max_recv_size = min_t(int, sp->max_recv_size, @@ -2265,14 +2261,38 @@ static int smb_direct_prepare(struct ksmbd_transport *t) sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1); - ret = smb_direct_send_negotiate_response(sc, ret); -out: +put: spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); sc->recv_io.reassembly.queue_length--; list_del(&recvmsg->list); spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); put_recvmsg(sc, recvmsg); + if (ret == -ECONNABORTED) + return ret; + + if (ret) + goto respond; + + /* + * We negotiated with success, so we need to refill the recv queue. + * We do that with sc->idle.immediate_work still being disabled + * via smbdirect_socket_init(), so that queue_work(sc->workqueue, + * &sc->idle.immediate_work) in smb_direct_post_recv_credits() + * is a no-op. + * + * The message that grants the credits to the client is + * the negotiate response. + */ + INIT_WORK(&sc->recv_io.posted.refill_work, smb_direct_post_recv_credits); + smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work); + if (unlikely(sc->first_error)) + return sc->first_error; + INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work); + +respond: + ret = smb_direct_send_negotiate_response(sc, ret); + return ret; } From f574069c5c55ebe642f899a01c8f127d845fd562 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Mon, 20 Oct 2025 16:07:53 +0200 Subject: [PATCH 215/263] smb: server: let smb_direct_cm_handler() call ib_drain_qp() after smb_direct_disconnect_rdma_work() All handlers triggered by ib_drain_qp() should already see the broken connection. smb_direct_cm_handler() is called under a mutex of the rdma_cm, we should make sure ib_drain_qp() and all rdma layer logic completes and unlocks the mutex. It means free_transport() will also already see the connection as SMBDIRECT_SOCKET_DISCONNECTED, so we need to call crdma_[un]lock_handler(sc->rdma.cm_id) around ib_drain_qp(), rdma_destroy_qp(), ib_free_cq() and ib_dealloc_pd(). Otherwise we free resources while the ib_drain_qp() within smb_direct_cm_handler() is still running. We have to unlock before rdma_destroy_id() as it locks again. Fixes: 141fa9824c0f ("ksmbd: call ib_drain_qp when disconnected") Fixes: 4c564f03e23b ("smb: server: make use of common smbdirect_socket") Cc: Namjae Jeon Cc: Steve French Cc: Tom Talpey Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index e70fc447e815..7d86553fcc7c 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -466,6 +466,9 @@ static void free_transport(struct smb_direct_transport *t) disable_delayed_work_sync(&sc->idle.timer_work); disable_work_sync(&sc->idle.immediate_work); + if (sc->rdma.cm_id) + rdma_lock_handler(sc->rdma.cm_id); + if (sc->ib.qp) { ib_drain_qp(sc->ib.qp); sc->ib.qp = NULL; @@ -494,8 +497,10 @@ static void free_transport(struct smb_direct_transport *t) ib_free_cq(sc->ib.recv_cq); if (sc->ib.pd) ib_dealloc_pd(sc->ib.pd); - if (sc->rdma.cm_id) + if (sc->rdma.cm_id) { + rdma_unlock_handler(sc->rdma.cm_id); rdma_destroy_id(sc->rdma.cm_id); + } smb_direct_destroy_pools(sc); ksmbd_conn_free(KSMBD_TRANS(t)->conn); @@ -1724,10 +1729,10 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id, } case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: { - ib_drain_qp(sc->ib.qp); - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; smb_direct_disconnect_rdma_work(&sc->disconnect_work); + if (sc->ib.qp) + ib_drain_qp(sc->ib.qp); break; } case RDMA_CM_EVENT_CONNECT_ERROR: { From 65f9c4c5888913c2cf5d2fc9454c83f9930d537d Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 24 Oct 2025 15:24:38 +0200 Subject: [PATCH 216/263] tools: ynl: fix string attribute length to include null terminator The ynl_attr_put_str() function was not including the null terminator in the attribute length calculation. This caused kernel to reject CTRL_CMD_GETFAMILY requests with EINVAL: "Attribute failed policy validation". For a 4-character family name like "dpll": - Sent: nla_len=8 (4 byte header + 4 byte string without null) - Expected: nla_len=9 (4 byte header + 5 byte string with null) The bug was introduced in commit 15d2540e0d62 ("tools: ynl: check for overflow of constructed messages") when refactoring from stpcpy() to strlen(). The original code correctly included the null terminator: end = stpcpy(ynl_attr_data(attr), str); attr->nla_len = NLA_HDRLEN + NLA_ALIGN(end - (char *)ynl_attr_data(attr)); Since stpcpy() returns a pointer past the null terminator, the length included it. The refactored version using strlen() omitted the +1. The fix also removes NLA_ALIGN() from nla_len calculation, since nla_len should contain actual attribute length, not aligned length. Alignment is only for calculating next attribute position. This makes the code consistent with ynl_attr_put(). CTRL_ATTR_FAMILY_NAME uses NLA_NUL_STRING policy which requires null terminator. Kernel validates with memchr() and rejects if not found. Fixes: 15d2540e0d62 ("tools: ynl: check for overflow of constructed messages") Signed-off-by: Petr Oros Tested-by: Ivan Vecera Reviewed-by: Ivan Vecera Link: https://lore.kernel.org/20251018151737.365485-3-zahari.doychev@linux.com Link: https://patch.msgid.link/20251024132438.351290-1-poros@redhat.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 29481989ea76..ced7dce44efb 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -313,7 +313,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) struct nlattr *attr; size_t len; - len = strlen(str); + len = strlen(str) + 1; if (__ynl_attr_put_overflow(nlh, len)) return; @@ -321,7 +321,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str) attr->nla_type = attr_type; strcpy((char *)ynl_attr_data(attr), str); - attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len); + attr->nla_len = NLA_HDRLEN + len; nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len); } From e3a0ca09acbe697245f944ee92b956db58a0ed09 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Thu, 23 Oct 2025 16:24:06 +0700 Subject: [PATCH 217/263] MAINTAINERS: mark ISDN subsystem as orphan We have not heard any activities from Karsten in years: - Last review tag was nine years ago in commit a921e9bd4e22a7 ("isdn: i4l: move active-isdn drivers to staging") - Last message on lore was in October 2020 [1]. Furthermore, messages to isdn mailing list bounce. Mark the subsystem as orphan to reflect these. [1]: https://lore.kernel.org/all/0ee243a9-9937-ad26-0684-44b18e772662@linux-pingi.de/ Signed-off-by: Bagas Sanjaya Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251023092406.56699-1-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- CREDITS | 4 ++++ MAINTAINERS | 8 ++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/CREDITS b/CREDITS index 903ea238e64f..fa5397f4ebcd 100644 --- a/CREDITS +++ b/CREDITS @@ -2036,6 +2036,10 @@ S: Botanicka' 68a S: 602 00 Brno S: Czech Republic +N: Karsten Keil +E: isdn@linux-pingi.de +D: ISDN subsystem maintainer + N: Jakob Kemi E: jakob.kemi@telia.com D: V4L W9966 Webcam driver diff --git a/MAINTAINERS b/MAINTAINERS index bccada21ef41..cca911b2de4e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13247,10 +13247,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending.git mast F: drivers/infiniband/ulp/isert ISDN/CMTP OVER BLUETOOTH -M: Karsten Keil -L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org -S: Odd Fixes +S: Orphan W: http://www.isdn4linux.de F: Documentation/isdn/ F: drivers/isdn/capi/ @@ -13259,10 +13257,8 @@ F: include/uapi/linux/isdn/ F: net/bluetooth/cmtp/ ISDN/mISDN SUBSYSTEM -M: Karsten Keil -L: isdn4linux@listserv.isdn4linux.de (subscribers-only) L: netdev@vger.kernel.org -S: Maintained +S: Orphan W: http://www.isdn4linux.de F: drivers/isdn/Kconfig F: drivers/isdn/Makefile From 03ca7c8c42be913529eb9f188278114430c6abbd Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Thu, 23 Oct 2025 21:13:37 +0800 Subject: [PATCH 218/263] net: hns3: return error code when function fails Currently, in hclge_mii_ioctl(), the operation to read the PHY register (SIOCGMIIREG) always returns 0. This patch changes the return type of hclge_read_phy_reg(), returning an error code when the function fails. Fixes: 024712f51e57 ("net: hns3: add ioctl support for imp-controlled PHYs") Signed-off-by: Jijie Shao Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/20251023131338.2642520-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 9 ++++++--- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 9d34d28ff168..782bb48c9f3d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9429,8 +9429,7 @@ static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd) /* this command reads phy id and register at the same time */ fallthrough; case SIOCGMIIREG: - data->val_out = hclge_read_phy_reg(hdev, data->reg_num); - return 0; + return hclge_read_phy_reg(hdev, data->reg_num, &data->val_out); case SIOCSMIIREG: return hclge_write_phy_reg(hdev, data->reg_num, data->val_in); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 96553109f44c..cf881108fa57 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -274,7 +274,7 @@ void hclge_mac_stop_phy(struct hclge_dev *hdev) phy_stop(phydev); } -u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr) +int hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 *val) { struct hclge_phy_reg_cmd *req; struct hclge_desc desc; @@ -286,11 +286,14 @@ u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr) req->reg_addr = cpu_to_le16(reg_addr); ret = hclge_cmd_send(&hdev->hw, &desc, 1); - if (ret) + if (ret) { dev_err(&hdev->pdev->dev, "failed to read phy reg, ret = %d.\n", ret); + return ret; + } - return le16_to_cpu(req->reg_val); + *val = le16_to_cpu(req->reg_val); + return 0; } int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h index 4200d0b6d931..21d434c82475 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h @@ -13,7 +13,7 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle); void hclge_mac_disconnect_phy(struct hnae3_handle *handle); void hclge_mac_start_phy(struct hclge_dev *hdev); void hclge_mac_stop_phy(struct hclge_dev *hdev); -u16 hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr); +int hclge_read_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 *val); int hclge_write_phy_reg(struct hclge_dev *hdev, u16 reg_addr, u16 val); #endif From 46a499aaf8c27476fd05e800f3e947bfd71aa724 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Thu, 23 Oct 2025 19:48:42 +0530 Subject: [PATCH 219/263] sfc: fix potential memory leak in efx_mae_process_mport() In efx_mae_enumerate_mports(), memory allocated for mae_mport_desc is passed as a argument to efx_mae_process_mport(), but when the error path in efx_mae_process_mport() gets executed, the memory allocated for desc gets leaked. Fix that by freeing the memory allocation before returning error. Fixes: a6a15aca4207 ("sfc: enumerate mports in ef100") Acked-by: Edward Cree Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251023141844.25847-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/mae.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 6fd0c1e9a7d5..7cfd9000f79d 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -1090,6 +1090,9 @@ void efx_mae_remove_mport(void *desc, void *arg) kfree(mport); } +/* + * Takes ownership of @desc, even if it returns an error + */ static int efx_mae_process_mport(struct efx_nic *efx, struct mae_mport_desc *desc) { @@ -1100,6 +1103,7 @@ static int efx_mae_process_mport(struct efx_nic *efx, if (!IS_ERR_OR_NULL(mport)) { netif_err(efx, drv, efx->net_dev, "mport with id %u does exist!!!\n", desc->mport_id); + kfree(desc); return -EEXIST; } From e3966940559d52aa1800a008dcfeec218dd31f88 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 24 Oct 2025 12:58:53 +0000 Subject: [PATCH 220/263] tools: ynl: avoid print_field when there is no reply When request a none support device operation, there will be no reply. In this case, the len(desc) check will always be true, causing print_field to enter an infinite loop and crash the program. Example reproducer: # ethtool.py -c veth0 To fix this, return immediately if there is no reply. Fixes: f3d07b02b2b8 ("tools: ynl: ethtool testing tool") Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20251024125853.102916-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/net/ynl/pyynl/ethtool.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index 9b523cbb3568..fd0f6b8d54d1 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -44,6 +44,9 @@ def print_field(reply, *desc): Pretty-print a set of fields from the reply. desc specifies the fields and the optional type (bool/yn). """ + if not reply: + return + if len(desc) == 0: return print_field(reply, *zip(reply.keys(), reply.keys())) From 520ad9e96937e825a117e9f00dd35a3e199d67b5 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 24 Oct 2025 20:55:12 +0200 Subject: [PATCH 221/263] dpll: spec: add missing module-name and clock-id to pin-get reply The dpll.yaml spec incorrectly omitted module-name and clock-id from the pin-get operation reply specification, even though the kernel DPLL implementation has always included these attributes in pin-get responses since the initial implementation. This spec inconsistency caused issues with the C YNL code generator. The generated dpll_pin_get_rsp structure was missing these fields. Fix the spec by adding module-name and clock-id to the pin-attrs reply specification to match the actual kernel behavior. Fixes: 3badff3a25d8 ("dpll: spec: Add Netlink spec in YAML") Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Link: https://patch.msgid.link/20251024185512.363376-1-poros@redhat.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/dpll.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index cafb4ec20447..80728f6f9bc8 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -605,6 +605,8 @@ operations: reply: &pin-attrs attributes: - id + - module-name + - clock-id - board-label - panel-label - package-label From 210b35d6a7ea415494ce75490c4b43b4e717d935 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 26 Oct 2025 11:17:42 +0100 Subject: [PATCH 222/263] dt-bindings: net: sparx5: Narrow properly LAN969x register space windows Commit 267bca002c50 ("dt-bindings: net: sparx5: correct LAN969x register space windows") said that LAN969x has exactly two address spaces ("reg" property) but implemented it as 2 or more. Narrow the constraint to properly express that only two items are allowed, which also matches Linux driver. Fixes: 267bca002c50 ("dt-bindings: net: sparx5: correct LAN969x register space windows") Signed-off-by: Krzysztof Kozlowski Acked-by: Rob Herring (Arm) Link: https://patch.msgid.link/20251026101741.20507-2-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/microchip,sparx5-switch.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml index 5caa3779660d..5491d0775ede 100644 --- a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml +++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml @@ -180,9 +180,9 @@ allOf: then: properties: reg: - minItems: 2 + maxItems: 2 reg-names: - minItems: 2 + maxItems: 2 else: properties: reg: From e9840461317e1bf0628b164de54632754d5f6a44 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Fri, 21 Feb 2025 10:39:49 +0100 Subject: [PATCH 223/263] ice: fix lane number calculation E82X adapters do not have sequential IDs, lane number is PF ID. Add check for ICE_MAC_GENERIC and skip checking port options. Also, adjust logical port number for specific E825 device with external PHY support (PCI device id 0x579F). For this particular device, with 2x25G (PHY0) and 2x10G (PHY1) port configuration, modification of pf_id -> lane_number mapping is required. PF IDs on the 2nd PHY start from 4 in such scenario. Otherwise, the lane number cannot be determined correctly, leading to PTP init errors during PF initialization. Fixes: 258f5f9058159 ("ice: Add correct PHY lane assignment") Co-developed-by: Karol Kolacinski Signed-off-by: Karol Kolacinski Signed-off-by: Grzegorz Nitka Reviewed-by: Przemek Kitszel Reviewed-by: Milena Olech Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2250426ec91b..28d74bf56ffc 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -4382,6 +4382,15 @@ int ice_get_phy_lane_number(struct ice_hw *hw) unsigned int lane; int err; + /* E82X does not have sequential IDs, lane number is PF ID. + * For E825 device, the exception is the variant with external + * PHY (0x579F), in which there is also 1:1 pf_id -> lane_number + * mapping. + */ + if (hw->mac_type == ICE_MAC_GENERIC || + hw->device_id == ICE_DEV_ID_E825C_SGMII) + return hw->pf_id; + options = kcalloc(ICE_AQC_PORT_OPT_MAX, sizeof(*options), GFP_KERNEL); if (!options) return -ENOMEM; From 45076413063cf5e0e25fd3f7f89fc90338b161c8 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Mon, 29 Sep 2025 17:29:05 +0200 Subject: [PATCH 224/263] ice: fix destination CGU for dual complex E825 On dual complex E825, only complex 0 has functional CGU (Clock Generation Unit), powering all the PHYs. SBQ (Side Band Queue) destination device 'cgu' in current implementation points to CGU on current complex and, in order to access primary CGU from the secondary complex, the driver should use 'cgu_peer' as a destination device in read/write CGU registers operations. Define new 'cgu_peer' (15) as RDA (Remote Device Access) client over SB-IOSF interface and use it as device target when accessing CGU from secondary complex. This problem has been identified when working on recovery clock enablement [1]. In existing implementation for E825 devices, only PF0, which is clock owner, is involved in CGU configuration, thus the problem was not exposed to the user. [1] https://lore.kernel.org/intel-wired-lan/20250905150947.871566-1-grzegorz.nitka@intel.com/ Fixes: e2193f9f9ec9 ("ice: enable timesync operation on 2xNAC E825 devices") Signed-off-by: Grzegorz Nitka Reviewed-by: Arkadiusz Kubalewski Reviewed-by: Aleksandr Loktionov Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 26 ++++++++++++++++++-- drivers/net/ethernet/intel/ice/ice_sbq_cmd.h | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 28d74bf56ffc..2532b6f82e97 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -6505,6 +6505,28 @@ u32 ice_get_link_speed(u16 index) return ice_aq_to_link_speed[index]; } +/** + * ice_get_dest_cgu - get destination CGU dev for given HW + * @hw: pointer to the HW struct + * + * Get CGU client id for CGU register read/write operations. + * + * Return: CGU device id to use in SBQ transactions. + */ +static enum ice_sbq_dev_id ice_get_dest_cgu(struct ice_hw *hw) +{ + /* On dual complex E825 only complex 0 has functional CGU powering all + * the PHYs. + * SBQ destination device cgu points to CGU on a current complex and to + * access primary CGU from the secondary complex, the driver should use + * cgu_peer as a destination device. + */ + if (hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) && + !ice_is_primary(hw)) + return ice_sbq_dev_cgu_peer; + return ice_sbq_dev_cgu; +} + /** * ice_read_cgu_reg - Read a CGU register * @hw: Pointer to the HW struct @@ -6519,8 +6541,8 @@ u32 ice_get_link_speed(u16 index) int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val) { struct ice_sbq_msg_input cgu_msg = { + .dest_dev = ice_get_dest_cgu(hw), .opcode = ice_sbq_msg_rd, - .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr }; int err; @@ -6551,8 +6573,8 @@ int ice_read_cgu_reg(struct ice_hw *hw, u32 addr, u32 *val) int ice_write_cgu_reg(struct ice_hw *hw, u32 addr, u32 val) { struct ice_sbq_msg_input cgu_msg = { + .dest_dev = ice_get_dest_cgu(hw), .opcode = ice_sbq_msg_wr, - .dest_dev = ice_sbq_dev_cgu, .msg_addr_low = addr, .data = val }; diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h index 183dd5457d6a..21bb861febbf 100644 --- a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h @@ -50,6 +50,7 @@ enum ice_sbq_dev_id { ice_sbq_dev_phy_0 = 0x02, ice_sbq_dev_cgu = 0x06, ice_sbq_dev_phy_0_peer = 0x0D, + ice_sbq_dev_cgu_peer = 0x0F, }; enum ice_sbq_msg_opcode { From 9a0f81fc64b2ba80ce768cd6e680c0f440723464 Mon Sep 17 00:00:00 2001 From: Grzegorz Nitka Date: Wed, 8 Oct 2025 12:28:53 +0200 Subject: [PATCH 225/263] ice: fix usage of logical PF id In some devices, the function numbers used are non-contiguous. For example, here is such configuration for E825 device: root@/home/root# lspci -v | grep Eth 0a:00.0 Ethernet controller: Intel Corporation Ethernet Connection E825-C for backplane (rev 04) 0a:00.1 Ethernet controller: Intel Corporation Ethernet Connection E825-C for backplane (rev 04) 0a:00.4 Ethernet controller: Intel Corporation Ethernet Connection E825-C 10GbE (rev 04) 0a:00.5 Ethernet controller: Intel Corporation Ethernet Connection E825-C 10GbE (rev 04) When distributing RSS and FDIR masks, which are global resources across the active devices, it is required to have a contiguous PF id, which can be described as a logical PF id. In the case above, function 0 would have a logical PF id of 0, function 1 would have a logical PF id of 1, and functions 4 and 5 would have a logical PF ids 2 and 3 respectively. Using logical PF id can properly describe which slice of resources can be used by a particular PF. The 'function id' to 'logical id' mapping has been introduced with the commit 015307754a19 ("ice: Support VF queue rate limit and quanta size configuration"). However, the usage of 'logical_pf_id' field was unintentionally skipped for profile mask configuration. Fix it by using 'logical_pf_id' instead of 'pf_id' value when configuring masks. Without that patch, wrong indexes, i.e. out of range for given PF, can be used while configuring resources masks, which might lead to memory corruption and undefined driver behavior. The call trace below is one of the examples of such error: [ +0.000008] WARNING: CPU: 39 PID: 3830 at drivers/base/devres.c:1095 devm_kfree+0x70/0xa0 [ +0.000002] RIP: 0010:devm_kfree+0x70/0xa0 [ +0.000001] Call Trace: [ +0.000002] [ +0.000002] ice_free_hw_tbls+0x183/0x710 [ice] [ +0.000106] ice_deinit_hw+0x67/0x90 [ice] [ +0.000091] ice_deinit+0x20d/0x2f0 [ice] [ +0.000076] ice_remove+0x1fa/0x6a0 [ice] [ +0.000075] pci_device_remove+0xa7/0x1d0 [ +0.000010] device_release_driver_internal+0x365/0x530 [ +0.000006] driver_detach+0xbb/0x170 [ +0.000003] bus_remove_driver+0x117/0x290 [ +0.000007] pci_unregister_driver+0x26/0x250 Fixes: 015307754a19 ("ice: Support VF queue rate limit and quanta size configuration") Suggested-by: Dan Nowlin Signed-off-by: Grzegorz Nitka Reviewed-by: Aleksandr Loktionov Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 363ae79a3620..013c93b6605e 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1479,7 +1479,7 @@ static void ice_init_prof_masks(struct ice_hw *hw, enum ice_block blk) per_pf = ICE_PROF_MASK_COUNT / hw->dev_caps.num_funcs; hw->blk[blk].masks.count = per_pf; - hw->blk[blk].masks.first = hw->pf_id * per_pf; + hw->blk[blk].masks.first = hw->logical_pf_id * per_pf; memset(hw->blk[blk].masks.masks, 0, sizeof(hw->blk[blk].masks.masks)); From 85308d999c4b4162a742c9ec5ef954226c3b48d9 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 1 Sep 2025 05:33:11 +0900 Subject: [PATCH 226/263] ixgbe: fix memory leak and use-after-free in ixgbe_recovery_probe() The error path of ixgbe_recovery_probe() has two memory bugs. For non-E610 adapters, the function jumps to clean_up_probe without calling devlink_free(), leaking the devlink instance and its embedded adapter structure. For E610 adapters, devlink_free() is called at shutdown_aci, but clean_up_probe then accesses adapter->state, sometimes triggering use-after-free because adapter is embedded in devlink. This UAF is similar to the one recently reported in ixgbe_remove(). (Link) Fix both issues by moving devlink_free() after adapter->state access, aligning with the cleanup order in ixgbe_probe(). Link: https://lore.kernel.org/intel-wired-lan/20250828020558.1450422-1-den@valinux.co.jp/ Fixes: 29cb3b8d95c7 ("ixgbe: add E610 implementation of FW recovery mode") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Reviewed-by: Jedrzej Jagielski Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ca1ccc630001..3190ce7e44c7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -11507,10 +11507,10 @@ static int ixgbe_recovery_probe(struct ixgbe_adapter *adapter) shutdown_aci: mutex_destroy(&adapter->hw.aci.lock); ixgbe_release_hw_control(adapter); - devlink_free(adapter->devlink); clean_up_probe: disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); + devlink_free(adapter->devlink); pci_release_mem_regions(pdev); if (disable_dev) pci_disable_device(pdev); From 81fb1fe75c672db905b54f4ab744552121099a24 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Sat, 20 Sep 2025 15:39:18 +0900 Subject: [PATCH 227/263] igc: power up the PHY before the link test The current implementation of the igc driver doesn't power up the PHY before the link test in igc_ethtool_diag_test(), causing the link test to always report FAIL when admin state is down and the PHY is consequently powered down. To test the link state regardless of admin state, power up the PHY before the link test in the offline test path. After the link test, the original PHY state is restored by igc_reset(), so additional code which explicitly restores the original state is not necessary. Note that this change is applied only for the offline test path. This is because in the online path we shouldn't interrupt normal networking operation and powering up the PHY and restoring the original state would interrupt that. This implementation also uses igc_power_up_phy_copper() without checking the media type, since igc devices are currently only copper devices and the function is called in other places without checking the media type. Furthermore, the powering up is on a best-effort basis, that is, we don't handle failures of powering up (e.g. bus error) and just let the test report FAIL. Tested on Intel Corporation Ethernet Controller I226-V (rev 04) with cable connected and link available. Set device down and do ethtool test. # ip link set dev enp0s5 down Without patch: # ethtool --test enp0s5 The test result is FAIL The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 0 Link test (on/offline) 1 With patch: # ethtool --test enp0s5 The test result is PASS The test extra info: Register test (offline) 0 Eeprom test (offline) 0 Interrupt test (offline) 0 Loopback test (offline) 0 Link test (on/offline) 0 Fixes: f026d8ca2904 ("igc: add support to eeprom, registers and link self-tests") Signed-off-by: Kohei Enju Reviewed-by: Vitaly Lifshits Tested-by: Avigail Dahan Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index f3e7218ba6f3..ca93629b1d3a 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -2094,6 +2094,9 @@ static void igc_ethtool_diag_test(struct net_device *netdev, netdev_info(adapter->netdev, "Offline testing starting"); set_bit(__IGC_TESTING, &adapter->state); + /* power up PHY for link test */ + igc_power_up_phy_copper(&adapter->hw); + /* Link test performed before hardware reset so autoneg doesn't * interfere with test result */ From bc73c5885c606f5e48dd4222eba0361fa0f146ca Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 6 Oct 2025 21:35:21 +0900 Subject: [PATCH 228/263] igb: use EOPNOTSUPP instead of ENOTSUPP in igb_get_sset_count() igb_get_sset_count() returns -ENOTSUPP when a given stringset is not supported, causing userland programs to get "Unknown error 524". Since EOPNOTSUPP should be used when error is propagated to userland, return -EOPNOTSUPP instead of -ENOTSUPP. Fixes: 9d5c824399de ("igb: PCI-Express 82575 Gigabit Ethernet driver") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f8a208c84f15..10e2445e0ded 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2281,7 +2281,7 @@ static int igb_get_sset_count(struct net_device *netdev, int sset) case ETH_SS_PRIV_FLAGS: return IGB_PRIV_FLAGS_STR_LEN; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } From 21d08d1c4c29f9795fbc678011c85f72931e22c1 Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 6 Oct 2025 21:35:22 +0900 Subject: [PATCH 229/263] igc: use EOPNOTSUPP instead of ENOTSUPP in igc_ethtool_get_sset_count() igc_ethtool_get_sset_count() returns -ENOTSUPP when a given stringset is not supported, causing userland programs to get "Unknown error 524". Since EOPNOTSUPP should be used when error is propagated to userland, return -EOPNOTSUPP instead of -ENOTSUPP. Fixes: 36b9fea60961 ("igc: Add support for statistics") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index ca93629b1d3a..bb783042d1af 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -810,7 +810,7 @@ static int igc_ethtool_get_sset_count(struct net_device *netdev, int sset) case ETH_SS_PRIV_FLAGS: return IGC_PRIV_FLAGS_STR_LEN; default: - return -ENOTSUPP; + return -EOPNOTSUPP; } } From f82acf6fb42115c87d3809968a2e0ab2fedba15b Mon Sep 17 00:00:00 2001 From: Kohei Enju Date: Mon, 6 Oct 2025 21:35:23 +0900 Subject: [PATCH 230/263] ixgbe: use EOPNOTSUPP instead of ENOTSUPP in ixgbe_ptp_feature_enable() When the requested PTP feature is not supported, ixgbe_ptp_feature_enable() returns -ENOTSUPP, causing userland programs to get "Unknown error 524". Since EOPNOTSUPP should be used when error is propagated to userland, return -EOPNOTSUPP instead of -ENOTSUPP. Fixes: 3a6a4edaa592 ("ixgbe: Hardware Timestamping + PTP Hardware Clock (PHC)") Signed-off-by: Kohei Enju Reviewed-by: Aleksandr Loktionov Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 114dd88fc71c..6885d2343c48 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -641,7 +641,7 @@ static int ixgbe_ptp_feature_enable(struct ptp_clock_info *ptp, * disabled */ if (rq->type != PTP_CLK_REQ_PPS || !adapter->ptp_setup_sdp) - return -ENOTSUPP; + return -EOPNOTSUPP; if (on) adapter->flags2 |= IXGBE_FLAG2_PTP_PPS_ENABLED; From 36fedc44e37e811f25666c600bce4bc027290226 Mon Sep 17 00:00:00 2001 From: Petr Oros Date: Fri, 24 Oct 2025 21:07:33 +0200 Subject: [PATCH 231/263] dpll: fix device-id-get and pin-id-get to return errors properly The device-id-get and pin-id-get handlers were ignoring errors from the find functions and sending empty replies instead of returning error codes to userspace. When dpll_device_find_from_nlattr() or dpll_pin_find_from_nlattr() returned an error (e.g., -EINVAL for "multiple matches" or -ENODEV for "not found"), the handlers checked `if (!IS_ERR(ptr))` and skipped adding the device/pin handle to the message, but then still sent the empty message as a successful reply. This caused userspace tools to receive empty responses with id=0 instead of proper netlink errors with extack messages like "multiple matches". The bug is visible via strace, which shows the kernel sending TWO netlink messages in response to a single request: 1. Empty reply (20 bytes, just header, no attributes): recvfrom(3, [{nlmsg_len=20, nlmsg_type=dpll, nlmsg_flags=0, ...}, {cmd=0x7, version=1}], ...) 2. NLMSG_ERROR ACK with extack (because of NLM_F_ACK flag): recvfrom(3, [{nlmsg_len=60, nlmsg_type=NLMSG_ERROR, nlmsg_flags=NLM_F_CAPPED|NLM_F_ACK_TLVS, ...}, [{error=0, msg={...}}, [{nla_type=NLMSGERR_ATTR_MSG}, "multiple matches"]]], ...) The C YNL library parses the first message, sees an empty response, and creates a result object with calloc() which zero-initializes all fields, resulting in id=0. The Python YNL library parses both messages and displays the extack from the second NLMSG_ERROR message. Fix by checking `if (IS_ERR(ptr))` first and returning the error code immediately, so that netlink properly sends only NLMSG_ERROR with the extack message to userspace. After this fix, both C and Python YNL tools receive only the NLMSG_ERROR and behave consistently. This affects: - DPLL_CMD_DEVICE_ID_GET: now properly returns error when multiple devices match the criteria (e.g., same module-name + clock-id) - DPLL_CMD_PIN_ID_GET: now properly returns error when multiple pins match the criteria (e.g., same module-name) Before fix: $ dpll pin id-get module-name ice 0 (wrong - should be error, there are 17 pins with module-name "ice") After fix: $ dpll pin id-get module-name ice Error: multiple matches (correct - kernel reports the ambiguity via extack) Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions") Signed-off-by: Petr Oros Reviewed-by: Ivan Vecera Link: https://patch.msgid.link/20251024190733.364101-1-poros@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/dpll_netlink.c | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 74c1f0ca95f2..a4153bcb6dcf 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -1559,16 +1559,18 @@ int dpll_nl_pin_id_get_doit(struct sk_buff *skb, struct genl_info *info) return -EMSGSIZE; } pin = dpll_pin_find_from_nlattr(info); - if (!IS_ERR(pin)) { - if (!dpll_pin_available(pin)) { - nlmsg_free(msg); - return -ENODEV; - } - ret = dpll_msg_add_pin_handle(msg, pin); - if (ret) { - nlmsg_free(msg); - return ret; - } + if (IS_ERR(pin)) { + nlmsg_free(msg); + return PTR_ERR(pin); + } + if (!dpll_pin_available(pin)) { + nlmsg_free(msg); + return -ENODEV; + } + ret = dpll_msg_add_pin_handle(msg, pin); + if (ret) { + nlmsg_free(msg); + return ret; } genlmsg_end(msg, hdr); @@ -1735,12 +1737,14 @@ int dpll_nl_device_id_get_doit(struct sk_buff *skb, struct genl_info *info) } dpll = dpll_device_find_from_nlattr(info); - if (!IS_ERR(dpll)) { - ret = dpll_msg_add_dev_handle(msg, dpll); - if (ret) { - nlmsg_free(msg); - return ret; - } + if (IS_ERR(dpll)) { + nlmsg_free(msg); + return PTR_ERR(dpll); + } + ret = dpll_msg_add_dev_handle(msg, dpll); + if (ret) { + nlmsg_free(msg); + return ret; } genlmsg_end(msg, hdr); From d8d2b1f81530988abe2e2bfaceec1c5d30b9a0b4 Mon Sep 17 00:00:00 2001 From: Pavel Zhigulin Date: Fri, 24 Oct 2025 19:13:02 +0300 Subject: [PATCH 232/263] net: cxgb4/ch_ipsec: fix potential use-after-free in ch_ipsec_xfrm_add_state() callback In ch_ipsec_xfrm_add_state() there is not check of try_module_get return value. It is very unlikely, but try_module_get() could return false value, which could cause use-after-free error. Conditions: The module count must be zero, and a module unload in progress. The thread doing the unload is blocked somewhere. Another thread makes a callback into the module for some request that (for instance) would need to create a kernel thread. It tries to get a reference for the thread. So try_module_get(THIS_MODULE) is the right call - and will fail here. This fix adds checking the result of try_module_get call Fixes: 6dad4e8ab3ec ("chcr: Add support for Inline IPSec") Signed-off-by: Pavel Zhigulin Link: https://patch.msgid.link/20251024161304.724436-1-Pavel.Zhigulin@kaspersky.com Signed-off-by: Jakub Kicinski --- .../ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c index ecd9a0bd5e18..49b57bb5fac1 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ipsec/chcr_ipsec.c @@ -290,9 +290,15 @@ static int ch_ipsec_xfrm_add_state(struct net_device *dev, return -EINVAL; } + if (unlikely(!try_module_get(THIS_MODULE))) { + NL_SET_ERR_MSG_MOD(extack, "Failed to acquire module reference"); + return -ENODEV; + } + sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); if (!sa_entry) { res = -ENOMEM; + module_put(THIS_MODULE); goto out; } @@ -301,7 +307,6 @@ static int ch_ipsec_xfrm_add_state(struct net_device *dev, sa_entry->esn = 1; ch_ipsec_setkey(x, sa_entry); x->xso.offload_handle = (unsigned long)sa_entry; - try_module_get(THIS_MODULE); out: return res; } From 40c17a02de41f12dd713309c7d2546117c577d29 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Mon, 27 Oct 2025 15:09:12 +0100 Subject: [PATCH 233/263] dpll: zl3073x: Fix output pin registration Currently, the signal format of an associated output is not considered during output pin registration. As a result, the driver registers output pins that are disabled by the signal format configuration. Fix this by calling zl3073x_output_pin_is_enabled() to check whether a given output pin should be registered or not. Fixes: 75a71ecc2412 ("dpll: zl3073x: Register DPLL devices and pins") Signed-off-by: Ivan Vecera Link: https://patch.msgid.link/20251027140912.233152-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- drivers/dpll/zl3073x/dpll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c index 93dc93eec79e..f93f9a458324 100644 --- a/drivers/dpll/zl3073x/dpll.c +++ b/drivers/dpll/zl3073x/dpll.c @@ -1904,7 +1904,7 @@ zl3073x_dpll_pin_is_registrable(struct zl3073x_dpll *zldpll, } is_diff = zl3073x_out_is_diff(zldev, out); - is_enabled = zl3073x_out_is_enabled(zldev, out); + is_enabled = zl3073x_output_pin_is_enabled(zldev, index); } /* Skip N-pin if the corresponding input/output is differential */ From 12d2303db892d397373e2af40758cbd97309ec37 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 25 Oct 2025 09:46:40 +0800 Subject: [PATCH 234/263] net: hibmcge: fix rx buf avl irq is not re-enabled in irq_handle issue irq initialized with the macro HBG_ERR_IRQ_I will automatically be re-enabled, whereas those initialized with the macro HBG_IRQ_I will not be re-enabled. Since the rx buf avl irq is initialized using the macro HBG_IRQ_I, it needs to be actively re-enabled; otherwise priv->stats.rx_fifo_less_empty_thrsld_cnt cannot be correctly incremented. Fixes: fd394a334b1c ("net: hibmcge: Add support for abnormal irq handling feature") Signed-off-by: Jijie Shao Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20251025014642.265259-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c index 8af0bc4cca21..ae4cb35186d8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_irq.c @@ -32,6 +32,7 @@ static void hbg_irq_handle_rx_buf_val(struct hbg_priv *priv, const struct hbg_irq_info *irq_info) { priv->stats.rx_fifo_less_empty_thrsld_cnt++; + hbg_hw_irq_enable(priv, irq_info->mask, true); } #define HBG_IRQ_I(name, handle) \ From 71eb8d1e07562b43bebc0c2721699814b43fd83d Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 25 Oct 2025 09:46:41 +0800 Subject: [PATCH 235/263] net: hibmcge: remove unnecessary check for np_link_fail in scenarios without phy. hibmcge driver uses fixed_phy to configure scenarios without PHY, where the driver is always in a linked state. However, there might be no link in hardware, so the np_link error is detected in hbg_hw_adjust_link(), which can cause abnormal logs. Therefore, in scenarios without a PHY, the driver no longer checks the np_link status. Fixes: 1d7cd7a9c69c ("net: hibmcge: support scenario without PHY") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20251025014642.265259-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h | 1 + drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c | 3 +++ drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c | 1 - 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index ea09a09c451b..2097e4c2b3d7 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -17,6 +17,7 @@ #define HBG_PCU_CACHE_LINE_SIZE 32 #define HBG_TX_TIMEOUT_BUF_LEN 1024 #define HBG_RX_DESCR 0x01 +#define HBG_NO_PHY 0xFF #define HBG_PACKET_HEAD_SIZE ((HBG_RX_SKIP1 + HBG_RX_SKIP2 + \ HBG_RX_DESCR) * HBG_PCU_CACHE_LINE_SIZE) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index d0aa0661ecd4..d6e8ce8e351a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -244,6 +244,9 @@ void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) hbg_hw_mac_enable(priv, HBG_STATUS_ENABLE); + if (priv->mac.phy_addr == HBG_NO_PHY) + return; + /* wait MAC link up */ ret = readl_poll_timeout(priv->io_base + HBG_REG_AN_NEG_STATE_ADDR, link_status, diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index 37791de47f6f..b6f0a2780ea8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -20,7 +20,6 @@ #define HBG_MDIO_OP_INTERVAL_US (5 * 1000) #define HBG_NP_LINK_FAIL_RETRY_TIMES 5 -#define HBG_NO_PHY 0xFF static void hbg_mdio_set_command(struct hbg_mac *mac, u32 cmd) { From 7e2958aee59ceb30ef153387741d12385b712250 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Sat, 25 Oct 2025 09:46:42 +0800 Subject: [PATCH 236/263] net: hibmcge: fix the inappropriate netif_device_detach() current, driver will call netif_device_detach() in pci_error_handlers.error_detected() and do reset in pci_error_handlers.slot_reset(). However, if pci_error_handlers.slot_reset() is not called after pci_error_handlers.error_detected(), driver will be detached and unable to recover. drivers/pci/pcie/err.c/report_error_detected() says: If any device in the subtree does not have an error_detected callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent error callbacks of any device in the subtree, and will exit in the disconnected error state. Therefore, when the hibmcge device and other devices that do not support the error_detected callback are under the same subtree, hibmcge will be unable to do slot_reset even for non-fatal errors. This path move netif_device_detach() from error_detected() to slot_reset(), ensuring that detach and reset are always executed together. Fixes: fd394a334b1c ("net: hibmcge: Add support for abnormal irq handling feature") Signed-off-by: Jijie Shao Link: https://patch.msgid.link/20251025014642.265259-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 83cf75bf7a17..e11495b7ee98 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -136,12 +136,11 @@ static pci_ers_result_t hbg_pci_err_detected(struct pci_dev *pdev, { struct net_device *netdev = pci_get_drvdata(pdev); - netif_device_detach(netdev); - - if (state == pci_channel_io_perm_failure) + if (state == pci_channel_io_perm_failure) { + netif_device_detach(netdev); return PCI_ERS_RESULT_DISCONNECT; + } - pci_disable_device(pdev); return PCI_ERS_RESULT_NEED_RESET; } @@ -150,6 +149,9 @@ static pci_ers_result_t hbg_pci_err_slot_reset(struct pci_dev *pdev) struct net_device *netdev = pci_get_drvdata(pdev); struct hbg_priv *priv = netdev_priv(netdev); + netif_device_detach(netdev); + pci_disable_device(pdev); + if (pci_enable_device(pdev)) { dev_err(&pdev->dev, "failed to re-enable PCI device after reset\n"); From 514f1dc8f2ca3101e04cdf452e53baca3a76e544 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 22 Oct 2025 17:18:10 +0200 Subject: [PATCH 237/263] netfilter: nft_ct: enable labels for get case too conntrack labels can only be set when the conntrack has been created with the "ctlabel" extension. For older iptables (connlabel match), adding an "-m connlabel" rule turns on the ctlabel extension allocation for all future conntrack entries. For nftables, its only enabled for 'ct label set foo', but not for 'ct label foo' (i.e. check). But users could have a ruleset that only checks for presence, and rely on userspace to set a label bit via ctnetlink infrastructure. This doesn't work without adding a dummy 'ct label set' rule. We could also enable extension infra for the first (failing) ctnetlink request, but unlike ruleset we would not be able to disable the extension again. Therefore turn on ctlabel extension allocation if an nftables ruleset checks for a connlabel too. Fixes: 1ad8f48df6f6 ("netfilter: nftables: add connlabel set support") Reported-by: Antonio Ojea Closes: https://lore.kernel.org/netfilter-devel/aPi_VdZpVjWujZ29@strlen.de/ Signed-off-by: Florian Westphal --- net/netfilter/nft_ct.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d526e69a2a2b..a418eb3d612b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -379,6 +379,14 @@ static bool nft_ct_tmpl_alloc_pcpu(void) } #endif +static void __nft_ct_get_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) +{ +#ifdef CONFIG_NF_CONNTRACK_LABELS + if (priv->key == NFT_CT_LABELS) + nf_connlabels_put(ctx->net); +#endif +} + static int nft_ct_get_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -413,6 +421,10 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (tb[NFTA_CT_DIRECTION] != NULL) return -EINVAL; len = NF_CT_LABELS_MAX_SIZE; + + err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); + if (err) + return err; break; #endif case NFT_CT_HELPER: @@ -494,7 +506,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, case IP_CT_DIR_REPLY: break; default: - return -EINVAL; + err = -EINVAL; + goto err; } } @@ -502,11 +515,11 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); if (err < 0) - return err; + goto err; err = nf_ct_netns_get(ctx->net, ctx->family); if (err < 0) - return err; + goto err; if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS || @@ -514,6 +527,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, nf_ct_set_acct(ctx->net, true); return 0; +err: + __nft_ct_get_destroy(ctx, priv); + return err; } static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) @@ -626,6 +642,9 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, static void nft_ct_get_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { + struct nft_ct *priv = nft_expr_priv(expr); + + __nft_ct_get_destroy(ctx, priv); nf_ct_netns_put(ctx->net, ctx->family); } From 8d96dfdcabef00e28f0c851b1502adb679dfc6d9 Mon Sep 17 00:00:00 2001 From: Fernando Fernandez Mancera Date: Fri, 24 Oct 2025 17:54:39 +0200 Subject: [PATCH 238/263] netfilter: nft_connlimit: fix possible data race on connection count nft_connlimit_eval() reads priv->list->count to check if the connection limit has been exceeded. This value is being read without a lock and can be modified by a different process. Use READ_ONCE() for correctness. Fixes: df4a90250976 ("netfilter: nf_conncount: merge lookup and add functions") Signed-off-by: Fernando Fernandez Mancera Signed-off-by: Florian Westphal --- net/netfilter/nft_connlimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 92b984fa8175..fc35a11cdca2 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -48,7 +48,7 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, return; } - count = priv->list->count; + count = READ_ONCE(priv->list->count); if ((count > priv->limit) ^ priv->invert) { regs->verdict.code = NFT_BREAK; From 90918e3b6404c2a37837b8f11692471b4c512de2 Mon Sep 17 00:00:00 2001 From: Andrii Melnychenko Date: Fri, 24 Oct 2025 18:22:16 +0200 Subject: [PATCH 239/263] netfilter: nft_ct: add seqadj extension for natted connections Sequence adjustment may be required for FTP traffic with PASV/EPSV modes. due to need to re-write packet payload (IP, port) on the ftp control connection. This can require changes to the TCP length and expected seq / ack_seq. The easiest way to reproduce this issue is with PASV mode. Example ruleset: table inet ftp_nat { ct helper ftp_helper { type "ftp" protocol tcp l3proto inet } chain prerouting { type filter hook prerouting priority 0; policy accept; tcp dport 21 ct state new ct helper set "ftp_helper" } } table ip nat { chain prerouting { type nat hook prerouting priority -100; policy accept; tcp dport 21 dnat ip prefix to ip daddr map { 192.168.100.1 : 192.168.13.2/32 } } chain postrouting { type nat hook postrouting priority 100 ; policy accept; tcp sport 21 snat ip prefix to ip saddr map { 192.168.13.2 : 192.168.100.1/32 } } } Note that the ftp helper gets assigned *after* the dnat setup. The inverse (nat after helper assign) is handled by an existing check in nf_nat_setup_info() and will not show the problem. Topoloy: +-------------------+ +----------------------------------+ | FTP: 192.168.13.2 | <-> | NAT: 192.168.13.3, 192.168.100.1 | +-------------------+ +----------------------------------+ | +-----------------------+ | Client: 192.168.100.2 | +-----------------------+ ftp nat changes do not work as expected in this case: Connected to 192.168.100.1. [..] ftp> epsv EPSV/EPRT on IPv4 off. ftp> ls 227 Entering passive mode (192,168,100,1,209,129). 421 Service not available, remote server has closed connection. Kernel logs: Missing nfct_seqadj_ext_add() setup call WARNING: CPU: 1 PID: 0 at net/netfilter/nf_conntrack_seqadj.c:41 [..] __nf_nat_mangle_tcp_packet+0x100/0x160 [nf_nat] nf_nat_ftp+0x142/0x280 [nf_nat_ftp] help+0x4d1/0x880 [nf_conntrack_ftp] nf_confirm+0x122/0x2e0 [nf_conntrack] nf_hook_slow+0x3c/0xb0 .. Fix this by adding the required extension when a conntrack helper is assigned to a connection that has a nat binding. Fixes: 1a64edf54f55 ("netfilter: nft_ct: add helper set support") Signed-off-by: Andrii Melnychenko Signed-off-by: Florian Westphal --- net/netfilter/nft_ct.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index a418eb3d612b..6f2ae7cad731 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -22,6 +22,7 @@ #include #include #include +#include struct nft_ct_helper_obj { struct nf_conntrack_helper *helper4; @@ -1192,6 +1193,10 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj, if (help) { rcu_assign_pointer(help->helper, to_assign); set_bit(IPS_HELPER_BIT, &ct->status); + + if ((ct->status & IPS_NAT_MASK) && !nfct_seqadj(ct)) + if (!nfct_seqadj_ext_add(ct)) + regs->verdict.code = NF_DROP; } } From a6f0459aadf1b41a9b9fae02006b1db024d60856 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 12:57:59 +0100 Subject: [PATCH 240/263] mptcp: fix subflow rcvbuf adjust The mptcp PM can add subflow to the conn_list before tcp_init_transfer(). Calling tcp_rcvbuf_grow() on such subflow is not correct as later init will overwrite the update. Fix the issue calling tcp_rcvbuf_grow() only after init buffer initialization. Fixes: e118cdc34dd1 ("mptcp: rcvbuf auto-tuning improvement") Signed-off-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-1-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 0292162a14ee..a8a3bdf95543 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2051,6 +2051,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) msk->rcvq_space.space = msk->rcvq_space.copied; if (mptcp_rcvbuf_grow(sk)) { + int copied = msk->rcvq_space.copied; /* Make subflows follow along. If we do not do this, we * get drops at subflow level if skbs can't be moved to @@ -2063,8 +2064,11 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ssk = mptcp_subflow_tcp_sock(subflow); slow = lock_sock_fast(ssk); - tcp_sk(ssk)->rcvq_space.space = msk->rcvq_space.copied; - tcp_rcvbuf_grow(ssk); + /* subflows can be added before tcp_init_transfer() */ + if (tcp_sk(ssk)->rcvq_space.space) { + tcp_sk(ssk)->rcvq_space.space = copied; + tcp_rcvbuf_grow(ssk); + } unlock_sock_fast(ssk, slow); } } From 24990d89c23de4dbef6b0b3d58383cafefdd6983 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:00 +0100 Subject: [PATCH 241/263] trace: tcp: add three metrics to trace_tcp_rcvbuf_grow() While chasing yet another receive autotuning bug, I found useful to add rcv_ssthresh, window_clamp and rcv_wnd. tcp_stream 40597 [068] 2172.978198: tcp:tcp_rcvbuf_grow: time=50307 rtt_us=50179 copied=77824 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=107474 window_clamp=112128 rcv_wnd=110592 tcp_stream 40597 [068] 2173.028528: tcp:tcp_rcvbuf_grow: time=50336 rtt_us=50206 copied=110592 inq=0 space=77824 ooo=0 scaling_ratio=219 rcvbuf=509444 rcv_ssthresh=328658 window_clamp=435813 rcv_wnd=331776 tcp_stream 40597 [068] 2173.078830: tcp:tcp_rcvbuf_grow: time=50305 rtt_us=50070 copied=270336 inq=0 space=110592 ooo=0 scaling_ratio=219 rcvbuf=509444 rcv_ssthresh=431159 window_clamp=435813 rcv_wnd=434176 tcp_stream 40597 [068] 2173.129137: tcp:tcp_rcvbuf_grow: time=50313 rtt_us=50118 copied=434176 inq=0 space=270336 ooo=0 scaling_ratio=219 rcvbuf=2457847 rcv_ssthresh=1299511 window_clamp=2102611 rcv_wnd=1302528 tcp_stream 40597 [068] 2173.179451: tcp:tcp_rcvbuf_grow: time=50318 rtt_us=50041 copied=1019904 inq=0 space=434176 ooo=0 scaling_ratio=219 rcvbuf=2457847 rcv_ssthresh=2087445 window_clamp=2102611 rcv_wnd=2088960 Signed-off-by: Eric Dumazet Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-2-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- include/trace/events/tcp.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 9d2c36c6a0ed..6757233bd064 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -218,6 +218,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, __field(__u32, space) __field(__u32, ooo_space) __field(__u32, rcvbuf) + __field(__u32, rcv_ssthresh) + __field(__u32, window_clamp) + __field(__u32, rcv_wnd) __field(__u8, scaling_ratio) __field(__u16, sport) __field(__u16, dport) @@ -245,6 +248,9 @@ TRACE_EVENT(tcp_rcvbuf_grow, tp->rcv_nxt; __entry->rcvbuf = sk->sk_rcvbuf; + __entry->rcv_ssthresh = tp->rcv_ssthresh; + __entry->window_clamp = tp->window_clamp; + __entry->rcv_wnd = tp->rcv_wnd; __entry->scaling_ratio = tp->scaling_ratio; __entry->sport = ntohs(inet->inet_sport); __entry->dport = ntohs(inet->inet_dport); @@ -264,11 +270,14 @@ TRACE_EVENT(tcp_rcvbuf_grow, ), TP_printk("time=%u rtt_us=%u copied=%u inq=%u space=%u ooo=%u scaling_ratio=%u rcvbuf=%u " + "rcv_ssthresh=%u window_clamp=%u rcv_wnd=%u " "family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 " "saddrv6=%pI6c daddrv6=%pI6c skaddr=%p sock_cookie=%llx", __entry->time, __entry->rtt_us, __entry->copied, __entry->inq, __entry->space, __entry->ooo_space, __entry->scaling_ratio, __entry->rcvbuf, + __entry->rcv_ssthresh, __entry->window_clamp, + __entry->rcv_wnd, show_family_name(__entry->family), __entry->sport, __entry->dport, __entry->saddr, __entry->daddr, From b1e014a1f3275a6f3d0f2b30b8117447fc3915f5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:01 +0100 Subject: [PATCH 242/263] tcp: add newval parameter to tcp_rcvbuf_grow() This patch has no functional change, and prepares the following one. tcp_rcvbuf_grow() will need to have access to tp->rcvq_space.space old and new values. Change mptcp_rcvbuf_grow() in a similar way. Signed-off-by: Eric Dumazet [ Moved 'oldval' declaration to the next patch to avoid warnings at build time. ] Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-3-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 2 +- net/ipv4/tcp_input.c | 14 +++++++------- net/mptcp/protocol.c | 20 ++++++++------------ 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5ca230ed526a..ab20f549b8f9 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -370,7 +370,7 @@ void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, int *karg); enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); -void tcp_rcvbuf_grow(struct sock *sk); +void tcp_rcvbuf_grow(struct sock *sk, u32 newval); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 31ea5af49f2d..cb4e07f84ae2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -891,18 +891,20 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, } } -void tcp_rcvbuf_grow(struct sock *sk) +void tcp_rcvbuf_grow(struct sock *sk, u32 newval) { const struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); - int rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap; + + tp->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return; /* slow start: allow the sender to double its rate. */ - rcvwin = tp->rcvq_space.space << 1; + rcvwin = newval << 1; if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; @@ -943,9 +945,7 @@ void tcp_rcv_space_adjust(struct sock *sk) trace_tcp_rcvbuf_grow(sk, time); - tp->rcvq_space.space = copied; - - tcp_rcvbuf_grow(sk); + tcp_rcvbuf_grow(sk, copied); new_measure: tp->rcvq_space.seq = tp->copied_seq; @@ -5270,7 +5270,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) } /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ if (sk->sk_socket) - tcp_rcvbuf_grow(sk); + tcp_rcvbuf_grow(sk, tp->rcvq_space.space); } static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a8a3bdf95543..052a0c62023f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -194,17 +194,18 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, * - mptcp does not maintain a msk-level window clamp * - returns true when the receive buffer is actually updated */ -static bool mptcp_rcvbuf_grow(struct sock *sk) +static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval) { struct mptcp_sock *msk = mptcp_sk(sk); const struct net *net = sock_net(sk); - int rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap; + msk->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return false; - rcvwin = msk->rcvq_space.space << 1; + rcvwin = newval << 1; if (!RB_EMPTY_ROOT(&msk->out_of_order_queue)) rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq; @@ -334,7 +335,7 @@ static void mptcp_data_queue_ofo(struct mptcp_sock *msk, struct sk_buff *skb) skb_set_owner_r(skb, sk); /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ if (sk->sk_socket) - mptcp_rcvbuf_grow(sk); + mptcp_rcvbuf_grow(sk, msk->rcvq_space.space); } static void mptcp_init_skb(struct sock *ssk, struct sk_buff *skb, int offset, @@ -2049,10 +2050,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (msk->rcvq_space.copied <= msk->rcvq_space.space) goto new_measure; - msk->rcvq_space.space = msk->rcvq_space.copied; - if (mptcp_rcvbuf_grow(sk)) { - int copied = msk->rcvq_space.copied; - + if (mptcp_rcvbuf_grow(sk, msk->rcvq_space.copied)) { /* Make subflows follow along. If we do not do this, we * get drops at subflow level if skbs can't be moved to * the mptcp rx queue fast enough (announced rcv_win can @@ -2065,10 +2063,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ssk = mptcp_subflow_tcp_sock(subflow); slow = lock_sock_fast(ssk); /* subflows can be added before tcp_init_transfer() */ - if (tcp_sk(ssk)->rcvq_space.space) { - tcp_sk(ssk)->rcvq_space.space = copied; - tcp_rcvbuf_grow(ssk); - } + if (tcp_sk(ssk)->rcvq_space.space) + tcp_rcvbuf_grow(ssk, msk->rcvq_space.copied); unlock_sock_fast(ssk, slow); } } From aa251c84636c326471ca9d53723816ba8fffe2bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Oct 2025 12:58:02 +0100 Subject: [PATCH 243/263] tcp: fix too slow tcp_rcvbuf_grow() action While the blamed commits apparently avoided an overshoot, they also limited how fast a sender can increase BDP at each RTT. This is not exactly a revert, we do not add the 16 * tp->advmss cushion we had, and we are keeping the out_of_order_queue contribution. Do the same in mptcp_rcvbuf_grow(). Tested: emulated 50ms rtt (tcp_stream --tcp-tx-delay 50000), cubic 20 second flow. net.ipv4.tcp_rmem set to "4096 131072 67000000" perf record -a -e tcp:tcp_rcvbuf_grow sleep 20 perf script Before: We can see we fail to roughly double RWIN at each RTT. Sender is RWIN limited while CWND is ramping up (before getting tcp_wmem limited). tcp_stream 33793 [010] 825.717525: tcp:tcp_rcvbuf_grow: time=100869 rtt_us=50428 copied=49152 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=103970 window_clamp=112128 rcv_wnd=106496 tcp_stream 33793 [010] 825.768966: tcp:tcp_rcvbuf_grow: time=51447 rtt_us=50362 copied=86016 inq=0 space=49152 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=107474 window_clamp=112128 rcv_wnd=106496 tcp_stream 33793 [010] 825.821539: tcp:tcp_rcvbuf_grow: time=52577 rtt_us=50243 copied=114688 inq=0 space=86016 ooo=0 scaling_ratio=219 rcvbuf=201096 rcv_ssthresh=167377 window_clamp=172031 rcv_wnd=167936 tcp_stream 33793 [010] 825.871781: tcp:tcp_rcvbuf_grow: time=50248 rtt_us=50237 copied=167936 inq=0 space=114688 ooo=0 scaling_ratio=219 rcvbuf=268129 rcv_ssthresh=224722 window_clamp=229375 rcv_wnd=225280 tcp_stream 33793 [010] 825.922475: tcp:tcp_rcvbuf_grow: time=50698 rtt_us=50183 copied=241664 inq=0 space=167936 ooo=0 scaling_ratio=219 rcvbuf=392617 rcv_ssthresh=331217 window_clamp=335871 rcv_wnd=323584 tcp_stream 33793 [010] 825.973326: tcp:tcp_rcvbuf_grow: time=50855 rtt_us=50213 copied=339968 inq=0 space=241664 ooo=0 scaling_ratio=219 rcvbuf=564986 rcv_ssthresh=478674 window_clamp=483327 rcv_wnd=462848 tcp_stream 33793 [010] 826.023970: tcp:tcp_rcvbuf_grow: time=50647 rtt_us=50248 copied=491520 inq=0 space=339968 ooo=0 scaling_ratio=219 rcvbuf=794811 rcv_ssthresh=671778 window_clamp=679935 rcv_wnd=651264 tcp_stream 33793 [010] 826.074612: tcp:tcp_rcvbuf_grow: time=50648 rtt_us=50227 copied=700416 inq=0 space=491520 ooo=0 scaling_ratio=219 rcvbuf=1149124 rcv_ssthresh=974881 window_clamp=983039 rcv_wnd=942080 tcp_stream 33793 [010] 826.125452: tcp:tcp_rcvbuf_grow: time=50845 rtt_us=50225 copied=987136 inq=8192 space=700416 ooo=0 scaling_ratio=219 rcvbuf=1637502 rcv_ssthresh=1392674 window_clamp=1400831 rcv_wnd=1339392 tcp_stream 33793 [010] 826.175698: tcp:tcp_rcvbuf_grow: time=50250 rtt_us=50198 copied=1347584 inq=0 space=978944 ooo=0 scaling_ratio=219 rcvbuf=2288672 rcv_ssthresh=1949729 window_clamp=1957887 rcv_wnd=1945600 tcp_stream 33793 [010] 826.225947: tcp:tcp_rcvbuf_grow: time=50252 rtt_us=50240 copied=1945600 inq=0 space=1347584 ooo=0 scaling_ratio=219 rcvbuf=3150516 rcv_ssthresh=2687010 window_clamp=2695167 rcv_wnd=2691072 tcp_stream 33793 [010] 826.276175: tcp:tcp_rcvbuf_grow: time=50233 rtt_us=50224 copied=2691072 inq=0 space=1945600 ooo=0 scaling_ratio=219 rcvbuf=4548617 rcv_ssthresh=3883041 window_clamp=3891199 rcv_wnd=3887104 tcp_stream 33793 [010] 826.326403: tcp:tcp_rcvbuf_grow: time=50233 rtt_us=50229 copied=3887104 inq=0 space=2691072 ooo=0 scaling_ratio=219 rcvbuf=6291456 rcv_ssthresh=5370482 window_clamp=5382144 rcv_wnd=5373952 tcp_stream 33793 [010] 826.376723: tcp:tcp_rcvbuf_grow: time=50323 rtt_us=50218 copied=5373952 inq=0 space=3887104 ooo=0 scaling_ratio=219 rcvbuf=9087658 rcv_ssthresh=7755537 window_clamp=7774207 rcv_wnd=7757824 tcp_stream 33793 [010] 826.426991: tcp:tcp_rcvbuf_grow: time=50274 rtt_us=50196 copied=7757824 inq=180224 space=5373952 ooo=0 scaling_ratio=219 rcvbuf=12563759 rcv_ssthresh=10729233 window_clamp=10747903 rcv_wnd=10575872 tcp_stream 33793 [010] 826.477229: tcp:tcp_rcvbuf_grow: time=50241 rtt_us=50078 copied=10731520 inq=180224 space=7577600 ooo=0 scaling_ratio=219 rcvbuf=17715667 rcv_ssthresh=15136529 window_clamp=15155199 rcv_wnd=14983168 tcp_stream 33793 [010] 826.527482: tcp:tcp_rcvbuf_grow: time=50258 rtt_us=50153 copied=15138816 inq=360448 space=10551296 ooo=0 scaling_ratio=219 rcvbuf=24667870 rcv_ssthresh=21073410 window_clamp=21102591 rcv_wnd=20766720 tcp_stream 33793 [010] 826.577712: tcp:tcp_rcvbuf_grow: time=50234 rtt_us=50228 copied=21073920 inq=0 space=14778368 ooo=0 scaling_ratio=219 rcvbuf=34550339 rcv_ssthresh=29517041 window_clamp=29556735 rcv_wnd=29519872 tcp_stream 33793 [010] 826.627982: tcp:tcp_rcvbuf_grow: time=50275 rtt_us=50220 copied=29519872 inq=540672 space=21073920 ooo=0 scaling_ratio=219 rcvbuf=49268707 rcv_ssthresh=42090625 window_clamp=42147839 rcv_wnd=41627648 tcp_stream 33793 [010] 826.678274: tcp:tcp_rcvbuf_grow: time=50296 rtt_us=50185 copied=42053632 inq=761856 space=28979200 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57238168 window_clamp=57316406 rcv_wnd=56606720 tcp_stream 33793 [010] 826.728627: tcp:tcp_rcvbuf_grow: time=50357 rtt_us=50128 copied=43913216 inq=851968 space=41291776 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56524800 tcp_stream 33793 [010] 827.131364: tcp:tcp_rcvbuf_grow: time=50239 rtt_us=50127 copied=43843584 inq=655360 space=43061248 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56696832 tcp_stream 33793 [010] 827.181613: tcp:tcp_rcvbuf_grow: time=50254 rtt_us=50115 copied=43843584 inq=524288 space=43188224 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56807424 tcp_stream 33793 [010] 828.339635: tcp:tcp_rcvbuf_grow: time=50283 rtt_us=50110 copied=43843584 inq=458752 space=43319296 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56864768 tcp_stream 33793 [010] 828.440350: tcp:tcp_rcvbuf_grow: time=50404 rtt_us=50099 copied=43843584 inq=393216 space=43384832 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=56922112 tcp_stream 33793 [010] 829.195106: tcp:tcp_rcvbuf_grow: time=50154 rtt_us=50077 copied=43843584 inq=196608 space=43450368 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57290728 window_clamp=57316406 rcv_wnd=57090048 After: It takes few steps to increase RWIN. Sender is no longer RWIN limited. tcp_stream 50826 [010] 935.634212: tcp:tcp_rcvbuf_grow: time=100788 rtt_us=50315 copied=49152 inq=0 space=40960 ooo=0 scaling_ratio=219 rcvbuf=131072 rcv_ssthresh=103970 window_clamp=112128 rcv_wnd=106496 tcp_stream 50826 [010] 935.685642: tcp:tcp_rcvbuf_grow: time=51437 rtt_us=50361 copied=86016 inq=0 space=49152 ooo=0 scaling_ratio=219 rcvbuf=160875 rcv_ssthresh=132969 window_clamp=137623 rcv_wnd=131072 tcp_stream 50826 [010] 935.738299: tcp:tcp_rcvbuf_grow: time=52660 rtt_us=50256 copied=139264 inq=0 space=86016 ooo=0 scaling_ratio=219 rcvbuf=502741 rcv_ssthresh=411497 window_clamp=430079 rcv_wnd=413696 tcp_stream 50826 [010] 935.788544: tcp:tcp_rcvbuf_grow: time=50249 rtt_us=50233 copied=307200 inq=0 space=139264 ooo=0 scaling_ratio=219 rcvbuf=728690 rcv_ssthresh=618717 window_clamp=623371 rcv_wnd=618496 tcp_stream 50826 [010] 935.838796: tcp:tcp_rcvbuf_grow: time=50258 rtt_us=50202 copied=618496 inq=0 space=307200 ooo=0 scaling_ratio=219 rcvbuf=2450338 rcv_ssthresh=1855709 window_clamp=2096187 rcv_wnd=1859584 tcp_stream 50826 [010] 935.889140: tcp:tcp_rcvbuf_grow: time=50347 rtt_us=50166 copied=1261568 inq=0 space=618496 ooo=0 scaling_ratio=219 rcvbuf=4376503 rcv_ssthresh=3725291 window_clamp=3743961 rcv_wnd=3706880 tcp_stream 50826 [010] 935.939435: tcp:tcp_rcvbuf_grow: time=50300 rtt_us=50185 copied=2478080 inq=24576 space=1261568 ooo=0 scaling_ratio=219 rcvbuf=9082648 rcv_ssthresh=7733731 window_clamp=7769921 rcv_wnd=7692288 tcp_stream 50826 [010] 935.989681: tcp:tcp_rcvbuf_grow: time=50251 rtt_us=50221 copied=4915200 inq=114688 space=2453504 ooo=0 scaling_ratio=219 rcvbuf=16574936 rcv_ssthresh=14108110 window_clamp=14179339 rcv_wnd=14024704 tcp_stream 50826 [010] 936.039967: tcp:tcp_rcvbuf_grow: time=50289 rtt_us=50279 copied=9830400 inq=114688 space=4800512 ooo=0 scaling_ratio=219 rcvbuf=32695050 rcv_ssthresh=27896187 window_clamp=27969593 rcv_wnd=27815936 tcp_stream 50826 [010] 936.090172: tcp:tcp_rcvbuf_grow: time=50211 rtt_us=50200 copied=19841024 inq=114688 space=9715712 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57245176 window_clamp=57316406 rcv_wnd=57163776 tcp_stream 50826 [010] 936.140430: tcp:tcp_rcvbuf_grow: time=50262 rtt_us=50197 copied=39501824 inq=114688 space=19726336 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57245176 window_clamp=57316406 rcv_wnd=57163776 tcp_stream 50826 [010] 936.190527: tcp:tcp_rcvbuf_grow: time=50101 rtt_us=50071 copied=43655168 inq=262144 space=39387136 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57032704 tcp_stream 50826 [010] 936.240719: tcp:tcp_rcvbuf_grow: time=50197 rtt_us=50057 copied=43843584 inq=262144 space=43393024 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57032704 tcp_stream 50826 [010] 936.341271: tcp:tcp_rcvbuf_grow: time=50297 rtt_us=50123 copied=43843584 inq=131072 space=43581440 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57147392 tcp_stream 50826 [010] 936.642503: tcp:tcp_rcvbuf_grow: time=50131 rtt_us=50084 copied=43843584 inq=0 space=43712512 ooo=0 scaling_ratio=219 rcvbuf=67000000 rcv_ssthresh=57259192 window_clamp=57316406 rcv_wnd=57262080 Fixes: 65c5287892e9 ("tcp: fix sk_rcvbuf overshoot") Fixes: e118cdc34dd1 ("mptcp: rcvbuf auto-tuning improvement") Reported-by: Neal Cardwell Signed-off-by: Eric Dumazet Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/589 Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Neal Cardwell Link: https://patch.msgid.link/20251028-net-tcp-recv-autotune-v3-4-74b43ba4c84c@kernel.org Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_input.c | 11 +++++++++-- net/mptcp/protocol.c | 10 +++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cb4e07f84ae2..e4a979b75cc6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -895,17 +895,24 @@ void tcp_rcvbuf_grow(struct sock *sk, u32 newval) { const struct net *net = sock_net(sk); struct tcp_sock *tp = tcp_sk(sk); - u32 rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap, oldval; + u64 grow; + oldval = tp->rcvq_space.space; tp->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return; - /* slow start: allow the sender to double its rate. */ + /* DRS is always one RTT late. */ rcvwin = newval << 1; + /* slow start: allow the sender to double its rate. */ + grow = (u64)rcvwin * (newval - oldval); + do_div(grow, oldval); + rcvwin += grow << 1; + if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) rcvwin += TCP_SKB_CB(tp->ooo_last_skb)->end_seq - tp->rcv_nxt; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 052a0c62023f..875027b9319c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -198,15 +198,23 @@ static bool mptcp_rcvbuf_grow(struct sock *sk, u32 newval) { struct mptcp_sock *msk = mptcp_sk(sk); const struct net *net = sock_net(sk); - u32 rcvwin, rcvbuf, cap; + u32 rcvwin, rcvbuf, cap, oldval; + u64 grow; + oldval = msk->rcvq_space.space; msk->rcvq_space.space = newval; if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) return false; + /* DRS is always one RTT late. */ rcvwin = newval << 1; + /* slow start: allow the sender to double its rate. */ + grow = (u64)rcvwin * (newval - oldval); + do_div(grow, oldval); + rcvwin += grow << 1; + if (!RB_EMPTY_ROOT(&msk->out_of_order_queue)) rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq; From a4384d786e38d5ff172f0908aae01c2c30719245 Mon Sep 17 00:00:00 2001 From: Abdun Nihaal Date: Tue, 28 Oct 2025 21:38:41 +0530 Subject: [PATCH 244/263] nfp: xsk: fix memory leak in nfp_net_alloc() In nfp_net_alloc(), the memory allocated for xsk_pools is not freed in the subsequent error paths, leading to a memory leak. Fix that by freeing it in the error path. Fixes: 6402528b7a0b ("nfp: xsk: add AF_XDP zero-copy Rx and Tx support") Signed-off-by: Abdun Nihaal Link: https://patch.msgid.link/20251028160845.126919-1-nihaal@cse.iitm.ac.in Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 132626a3f9f7..9ef72f294117 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2557,14 +2557,16 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, err = nfp_net_tlv_caps_parse(&nn->pdev->dev, nn->dp.ctrl_bar, &nn->tlv_caps); if (err) - goto err_free_nn; + goto err_free_xsk_pools; err = nfp_ccm_mbox_alloc(nn); if (err) - goto err_free_nn; + goto err_free_xsk_pools; return nn; +err_free_xsk_pools: + kfree(nn->dp.xsk_pools); err_free_nn: if (nn->dp.netdev) free_netdev(nn->dp.netdev); From a43303809868b22bd1303739ba334e982b234d45 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 28 Oct 2025 20:20:27 +0700 Subject: [PATCH 245/263] Documentation: netconsole: Remove obsolete contact people Breno Leitao has been listed in MAINTAINERS as netconsole maintainer since 7c938e438c56db ("MAINTAINERS: make Breno the netconsole maintainer"), but the documentation says otherwise that bug reports should be sent to original netconsole authors. Remove obsolate contact info. Signed-off-by: Bagas Sanjaya Reviewed-by: Randy Dunlap Tested-by: Randy Dunlap Link: https://patch.msgid.link/20251028132027.48102-1-bagasdotme@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/networking/netconsole.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index 59cb9982afe6..2555e75e5cc1 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -19,9 +19,6 @@ Userdata append support by Matthew Wood , Jan 22 2024 Sysdata append support by Breno Leitao , Jan 15 2025 -Please send bug reports to Matt Mackall -Satyam Sharma , and Cong Wang - Introduction: ============= From 00764aa5c9bbb2044eb04d6d78584a436666b231 Mon Sep 17 00:00:00 2001 From: Gustavo Luiz Duarte Date: Tue, 28 Oct 2025 15:06:32 -0700 Subject: [PATCH 246/263] netconsole: Fix race condition in between reader and writer of userdata The update_userdata() function constructs the complete userdata string in nt->extradata_complete and updates nt->userdata_length. This data is then read by write_msg() and write_ext_msg() when sending netconsole messages. However, update_userdata() was not holding target_list_lock during this process, allowing concurrent message transmission to read partially updated userdata. This race condition could result in netconsole messages containing incomplete or inconsistent userdata - for example, reading the old userdata_length with new extradata_complete content, or vice versa, leading to truncated or corrupted output. Fix this by acquiring target_list_lock with spin_lock_irqsave() before updating extradata_complete and userdata_length, and releasing it after both fields are fully updated. This ensures that readers see a consistent view of the userdata, preventing corruption during concurrent access. The fix aligns with the existing locking pattern used throughout the netconsole code, where target_list_lock protects access to target fields including buf[] and msgcounter that are accessed during message transmission. Also get rid of the unnecessary variable complete_idx, which makes it easier to bail out of update_userdata(). Fixes: df03f830d099 ("net: netconsole: cache userdata formatted string in netconsole_target") Signed-off-by: Gustavo Luiz Duarte Link: https://patch.msgid.link/20251028-netconsole-fix-race-v4-1-63560b0ae1a0@meta.com Signed-off-by: Jakub Kicinski --- drivers/net/netconsole.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 194570443493..5d8d0214786c 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -886,8 +886,11 @@ static ssize_t userdatum_value_show(struct config_item *item, char *buf) static void update_userdata(struct netconsole_target *nt) { - int complete_idx = 0, child_count = 0; struct list_head *entry; + int child_count = 0; + unsigned long flags; + + spin_lock_irqsave(&target_list_lock, flags); /* Clear the current string in case the last userdatum was deleted */ nt->userdata_length = 0; @@ -897,8 +900,11 @@ static void update_userdata(struct netconsole_target *nt) struct userdatum *udm_item; struct config_item *item; - if (WARN_ON_ONCE(child_count >= MAX_EXTRADATA_ITEMS)) - break; + if (child_count >= MAX_EXTRADATA_ITEMS) { + spin_unlock_irqrestore(&target_list_lock, flags); + WARN_ON_ONCE(1); + return; + } child_count++; item = container_of(entry, struct config_item, ci_entry); @@ -912,12 +918,11 @@ static void update_userdata(struct netconsole_target *nt) * one entry length (1/MAX_EXTRADATA_ITEMS long), entry count is * checked to not exceed MAX items with child_count above */ - complete_idx += scnprintf(&nt->extradata_complete[complete_idx], - MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", - item->ci_name, udm_item->value); + nt->userdata_length += scnprintf(&nt->extradata_complete[nt->userdata_length], + MAX_EXTRADATA_ENTRY_LEN, " %s=%s\n", + item->ci_name, udm_item->value); } - nt->userdata_length = strnlen(nt->extradata_complete, - sizeof(nt->extradata_complete)); + spin_unlock_irqrestore(&target_list_lock, flags); } static ssize_t userdatum_value_store(struct config_item *item, const char *buf, From 27b0e701d3872ba59c5b579a9e8a02ea49ad3d3b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:52 +0100 Subject: [PATCH 247/263] mptcp: drop bogus optimization in __mptcp_check_push() Accessing the transmit queue without owning the msk socket lock is inherently racy, hence __mptcp_check_push() could actually quit early even when there is pending data. That in turn could cause unexpected tx lock and timeout. Dropping the early check avoids the race, implicitly relaying on later tests under the relevant lock. With such change, all the other mptcp_send_head() call sites are now under the msk socket lock and we can additionally drop the now unneeded annotation on the transmit head pointer accesses. Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Geliang Tang Tested-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-1-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 11 ++++------- net/mptcp/protocol.h | 2 +- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 875027b9319c..655a2a45224f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1007,7 +1007,7 @@ static void __mptcp_clean_una(struct sock *sk) if (WARN_ON_ONCE(!msk->recovery)) break; - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + msk->first_pending = mptcp_send_next(sk); } dfrag_clear(sk, dfrag); @@ -1552,7 +1552,7 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, mptcp_update_post_push(msk, dfrag, ret); } - WRITE_ONCE(msk->first_pending, mptcp_send_next(sk)); + msk->first_pending = mptcp_send_next(sk); if (msk->snd_burst <= 0 || !sk_stream_memory_free(ssk) || @@ -1912,7 +1912,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) get_page(dfrag->page); list_add_tail(&dfrag->list, &msk->rtx_queue); if (!msk->first_pending) - WRITE_ONCE(msk->first_pending, dfrag); + msk->first_pending = dfrag; } pr_debug("msk=%p dfrag at seq=%llu len=%u sent=%u new=%d\n", msk, dfrag->data_seq, dfrag->data_len, dfrag->already_sent, @@ -2882,7 +2882,7 @@ static void __mptcp_clear_xmit(struct sock *sk) struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - WRITE_ONCE(msk->first_pending, NULL); + msk->first_pending = NULL; list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) dfrag_clear(sk, dfrag); } @@ -3422,9 +3422,6 @@ void __mptcp_data_acked(struct sock *sk) void __mptcp_check_push(struct sock *sk, struct sock *ssk) { - if (!mptcp_send_head(sk)) - return; - if (!sock_owned_by_user(sk)) __mptcp_subflow_push_pending(sk, ssk, false); else diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 52f9cfa4ce95..379a88e14e8d 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -414,7 +414,7 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) { const struct mptcp_sock *msk = mptcp_sk(sk); - return READ_ONCE(msk->first_pending); + return msk->first_pending; } static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) From 8e04ce45a8db7a080220e86e249198fa676b83dc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:53 +0100 Subject: [PATCH 248/263] mptcp: fix MSG_PEEK stream corruption If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the receive queue and recvmsg() need to waits for more data - i.e. it's a blocking one - upon arrival of the next packet the MPTCP protocol will start again copying the oldest data present in the receive queue, corrupting the data stream. Address the issue explicitly tracking the peeked sequence number, restarting from the last peeked byte. Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Geliang Tang Tested-by: Geliang Tang Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-2-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 655a2a45224f..2535788569ab 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1945,22 +1945,36 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied); -static int __mptcp_recvmsg_mskq(struct sock *sk, - struct msghdr *msg, - size_t len, int flags, +static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg, + size_t len, int flags, int copied_total, struct scm_timestamping_internal *tss, int *cmsg_flags) { struct mptcp_sock *msk = mptcp_sk(sk); struct sk_buff *skb, *tmp; + int total_data_len = 0; int copied = 0; skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { - u32 offset = MPTCP_SKB_CB(skb)->offset; + u32 delta, offset = MPTCP_SKB_CB(skb)->offset; u32 data_len = skb->len - offset; - u32 count = min_t(size_t, len - copied, data_len); + u32 count; int err; + if (flags & MSG_PEEK) { + /* skip already peeked skbs */ + if (total_data_len + data_len <= copied_total) { + total_data_len += data_len; + continue; + } + + /* skip the already peeked data in the current skb */ + delta = copied_total - total_data_len; + offset += delta; + data_len -= delta; + } + + count = min_t(size_t, len - copied, data_len); if (!(flags & MSG_TRUNC)) { err = skb_copy_datagram_msg(skb, offset, msg, count); if (unlikely(err < 0)) { @@ -1977,16 +1991,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, copied += count; - if (count < data_len) { - if (!(flags & MSG_PEEK)) { + if (!(flags & MSG_PEEK)) { + msk->bytes_consumed += count; + if (count < data_len) { MPTCP_SKB_CB(skb)->offset += count; MPTCP_SKB_CB(skb)->map_seq += count; - msk->bytes_consumed += count; + break; } - break; - } - if (!(flags & MSG_PEEK)) { /* avoid the indirect call, we know the destructor is sock_rfree */ skb->destructor = NULL; skb->sk = NULL; @@ -1994,7 +2006,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk, sk_mem_uncharge(sk, skb->truesize); __skb_unlink(skb, &sk->sk_receive_queue); skb_attempt_defer_free(skb); - msk->bytes_consumed += count; } if (copied >= len) @@ -2191,7 +2202,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, while (copied < len) { int err, bytes_read; - bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags); + bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, + copied, &tss, &cmsg_flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; From a824084b98d8a1dbd6e85d0842a8eb5e73467f59 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:54 +0100 Subject: [PATCH 249/263] mptcp: restore window probe Since commit 72377ab2d671 ("mptcp: more conservative check for zero probes") the MPTCP-level zero window probe check is always disabled, as the TCP-level write queue always contains at least the newly allocated skb. Refine the relevant check tacking in account that the above condition and that such skb can have zero length. Fixes: 72377ab2d671 ("mptcp: more conservative check for zero probes") Cc: stable@vger.kernel.org Reported-by: Geliang Tang Closes: https://lore.kernel.org/d0a814c364e744ca6b836ccd5b6e9146882e8d42.camel@kernel.org Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-3-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 2535788569ab..5d8714adae6c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1299,7 +1299,12 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, if (copy == 0) { u64 snd_una = READ_ONCE(msk->snd_una); - if (snd_una != msk->snd_nxt || tcp_write_queue_tail(ssk)) { + /* No need for zero probe if there are any data pending + * either at the msk or ssk level; skb is the current write + * queue tail and can be empty at this point. + */ + if (snd_una != msk->snd_nxt || skb->len || + skb != tcp_send_head(ssk)) { tcp_remove_empty_skb(ssk); return 0; } From fe11dfa10919ce594682c76f5f648a0840d80a2b Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 28 Oct 2025 09:16:55 +0100 Subject: [PATCH 250/263] mptcp: zero window probe mib Explicitly account for MPTCP-level zero windows probe, to catch hopefully earlier issues alike the one addressed by the previous patch. Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Tested-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251028-net-mptcp-send-timeout-v1-4-38ffff5a9ec8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/mib.c | 1 + net/mptcp/mib.h | 1 + net/mptcp/protocol.c | 1 + 3 files changed, 3 insertions(+) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 6003e47c770a..171643815076 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -85,6 +85,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK), SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK), SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED), + SNMP_MIB_ITEM("WinProbe", MPTCP_MIB_WINPROBE), }; /* mptcp_mib_alloc - allocate percpu mib counters diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 309bac6fea32..a1d3e9369fbb 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -88,6 +88,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */ MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */ MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */ + MPTCP_MIB_WINPROBE, /* MPTCP-level zero window probe */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5d8714adae6c..2d6b8de35c44 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1355,6 +1355,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, mpext->dsn64); if (zero_window_probe) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_WINPROBE); mptcp_subflow_ctx(ssk)->rel_write_seq += copy; mpext->frozen = 1; if (READ_ONCE(msk->csum_enabled)) From dc89548c6926d68dfdda11bebc1a5258bc41d887 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 27 Oct 2025 00:43:16 +0800 Subject: [PATCH 251/263] net: usb: asix_devices: Check return value of usbnet_get_endpoints The code did not check the return value of usbnet_get_endpoints. Add checks and return the error if it fails to transfer the error. Found via static anlaysis and this is similar to commit 07161b2416f7 ("sr9800: Add check for usbnet_get_endpoints"). Fixes: 933a27d39e0e ("USB: asix - Add AX88178 support and many other changes") Fixes: 2e55cc7210fe ("[PATCH] USB: usbnet (3/9) module for ASIX Ethernet adapters") Cc: stable@vger.kernel.org Signed-off-by: Miaoqian Lin Link: https://patch.msgid.link/20251026164318.57624-1-linmq006@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/usb/asix_devices.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 85bd5d845409..232bbd79a4de 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -230,7 +230,9 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) int i; unsigned long gpio_bits = dev->driver_info->data; - usbnet_get_endpoints(dev,intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + goto out; /* Toggle the GPIOs in a manufacturer/model specific way */ for (i = 2; i >= 0; i--) { @@ -848,7 +850,9 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) dev->driver_priv = priv; - usbnet_get_endpoints(dev, intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + return ret; /* Maybe the boot loader passed the MAC address via device tree */ if (!eth_platform_get_mac_address(&dev->udev->dev, buf)) { @@ -1281,7 +1285,9 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf) int ret; u8 buf[ETH_ALEN] = {0}; - usbnet_get_endpoints(dev,intf); + ret = usbnet_get_endpoints(dev, intf); + if (ret) + return ret; /* Get the MAC address */ ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); From 53110232c95ff56067fd96c75a1a1c53d10dcd98 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Sun, 26 Oct 2025 22:20:19 +0200 Subject: [PATCH 252/263] net/mlx5: Don't zero user_count when destroying FDB tables esw->user_count tracks how many TC rules are added on an esw via mlx5e_configure_flower -> mlx5_esw_get -> atomic64_inc(&esw->user_count) esw.user_count was unconditionally set to 0 in esw_destroy_legacy_fdb_table and esw_destroy_offloads_fdb_tables. These two together can lead to the following sequence of events: 1. echo 1 > /sys/class/net/eth2/device/sriov_numvfs - mlx5_core_sriov_configure -...-> esw_create_legacy_table -> atomic64_set(&esw->user_count, 0) 2. tc qdisc add dev eth2 ingress && \ tc filter replace dev eth2 pref 1 protocol ip chain 0 ingress \ handle 1 flower action ct nat zone 64000 pipe - mlx5e_configure_flower -> mlx5_esw_get -> atomic64_inc(&esw->user_count) 3. echo 0 > /sys/class/net/eth2/device/sriov_numvfs - mlx5_core_sriov_configure -..-> esw_destroy_legacy_fdb_table -> atomic64_set(&esw->user_count, 0) 4. devlink dev eswitch set pci/0000:08:00.0 mode switchdev - mlx5_devlink_eswitch_mode_set -> mlx5_esw_try_lock -> atomic64_read(&esw->user_count) == 0 - then proceed to a WARN_ON in: esw_offloads_start -> mlx5_eswitch_enable_locke -> esw_offloads_enable -> mlx5_esw_offloads_rep_load -> mlx5e_vport_rep_load -> mlx5e_netdev_change_profile -> mlx5e_detach_netdev -> mlx5e_cleanup_nic_rx -> mlx5e_tc_nic_cleanup -> mlx5e_mod_hdr_tbl_destroy Fix this by not clearing out the user_count when destroying FDB tables, so that the check in mlx5_esw_try_lock can prevent the mode change when there are TC rules configured, as originally intended. Fixes: 2318b8bb94a3 ("net/mlx5: E-switch, Destroy legacy fdb table when needed") Signed-off-by: Cosmin Ratiu Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Reviewed-by: Simon Horman Link: https://patch.msgid.link/1761510019-938772-1-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 76382626ad41..929adeb50a98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -66,7 +66,6 @@ static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) esw->fdb_table.legacy.addr_grp = NULL; esw->fdb_table.legacy.allmulti_grp = NULL; esw->fdb_table.legacy.promisc_grp = NULL; - atomic64_set(&esw->user_count, 0); } static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 34749814f19b..44a142a041b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1978,7 +1978,6 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); - atomic64_set(&esw->user_count, 0); } static int esw_get_nr_ft_offloads_steering_src_ports(struct mlx5_eswitch *esw) From da2522df3fcc6f57068470cbdcd6516d9eb76b37 Mon Sep 17 00:00:00 2001 From: Jinliang Wang Date: Sun, 26 Oct 2025 23:55:30 -0700 Subject: [PATCH 253/263] net: mctp: Fix tx queue stall The tx queue can become permanently stuck in a stopped state due to a race condition between the URB submission path and its completion callback. The URB completion callback can run immediately after usb_submit_urb() returns, before the submitting function calls netif_stop_queue(). If this occurs, the queue state management becomes desynchronized, leading to a stall where the queue is never woken. Fix this by moving the netif_stop_queue() call to before submitting the URB. This closes the race window by ensuring the network stack is aware the queue is stopped before the URB completion can possibly run. Fixes: 0791c0327a6e ("net: mctp: Add MCTP USB transport driver") Signed-off-by: Jinliang Wang Acked-by: Jeremy Kerr Link: https://patch.msgid.link/20251027065530.2045724-1-jinliangw@google.com Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-usb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/mctp/mctp-usb.c b/drivers/net/mctp/mctp-usb.c index 36ccc53b1797..ef860cfc629f 100644 --- a/drivers/net/mctp/mctp-usb.c +++ b/drivers/net/mctp/mctp-usb.c @@ -96,11 +96,13 @@ static netdev_tx_t mctp_usb_start_xmit(struct sk_buff *skb, skb->data, skb->len, mctp_usb_out_complete, skb); + /* Stops TX queue first to prevent race condition with URB complete */ + netif_stop_queue(dev); rc = usb_submit_urb(urb, GFP_ATOMIC); - if (rc) + if (rc) { + netif_wake_queue(dev); goto err_drop; - else - netif_stop_queue(dev); + } return NETDEV_TX_OK; From 9311e9540a8b406d9f028aa87fb072a3819d4c82 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Mon, 27 Oct 2025 17:57:10 +0800 Subject: [PATCH 254/263] selftests: net: use BASH for bareudp testing In bareudp.sh, this script uses /bin/sh and it will load another lib.sh BASH script at the very beginning. But on some operating systems like Ubuntu, /bin/sh is actually pointed to DASH, thus it will try to run BASH commands with DASH and consequently leads to syntax issues: # ./bareudp.sh: 4: ./lib.sh: Bad substitution # ./bareudp.sh: 5: ./lib.sh: source: not found # ./bareudp.sh: 24: ./lib.sh: Syntax error: "(" unexpected Fix this by explicitly using BASH for bareudp.sh. This fixes test execution failures on systems where /bin/sh is not BASH. Reported-by: Edoardo Canepa Link: https://bugs.launchpad.net/bugs/2129812 Signed-off-by: Po-Hsu Lin Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20251027095710.2036108-2-po-hsu.lin@canonical.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bareudp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index 4046131e7888..d9e5b967f815 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Test various bareudp tunnel configurations. From 298574936a6c4ebbe655e15d971ddb1a96c7dc0b Mon Sep 17 00:00:00 2001 From: Thanh Quan Date: Mon, 27 Oct 2025 15:02:43 +0100 Subject: [PATCH 255/263] net: phy: dp83869: fix STRAP_OPMODE bitmask According to the TI DP83869HM datasheet Revision D (June 2025), section 7.6.1.41 STRAP_STS Register, the STRAP_OPMODE bitmask is bit [11:9]. Fix this. In case the PHY is auto-detected via PHY ID registers, or not described in DT, or, in case the PHY is described in DT but the optional DT property "ti,op-mode" is not present, then the driver reads out the PHY functional mode (RGMII, SGMII, ...) from hardware straps. Currently, all upstream users of this PHY specify both DT compatible string "ethernet-phy-id2000.a0f1" and ti,op-mode = property, therefore it seems no upstream users are affected by this bug. The driver currently interprets bits [2:0] of STRAP_STS register as PHY functional mode. Those bits are controlled by ANEG_DIS, ANEGSEL_0 straps and an always-zero reserved bit. Systems that use RGMII-to-Copper functional mode are unlikely to disable auto-negotiation via ANEG_DIS strap, or change auto-negotiation behavior via ANEGSEL_0 strap. Therefore, even with this bug in place, the STRAP_STS register content is likely going to be interpreted by the driver as RGMII-to-Copper mode. However, for a system with PHY functional mode strapping set to other mode than RGMII-to-Copper, the driver is likely to misinterpret the strapping as RGMII-to-Copper and misconfigure the PHY. For example, on a system with SGMII-to-Copper strapping, the STRAP_STS register reads as 0x0c20, but the PHY ends up being configured for incompatible RGMII-to-Copper mode. Fixes: 0eaf8ccf2047 ("net: phy: dp83869: Set opmode from straps") Reviewed-by: Andrew Lunn Signed-off-by: Thanh Quan Signed-off-by: Hai Pham Signed-off-by: Marek Vasut # Port from U-Boot to Linux Link: https://patch.msgid.link/20251027140320.8996-1-marek.vasut+renesas@mailbox.org Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83869.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index a2cd1cc35cde..1f381d7b13ff 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -84,7 +84,7 @@ #define DP83869_CLK_DELAY_DEF 7 /* STRAP_STS1 bits */ -#define DP83869_STRAP_OP_MODE_MASK GENMASK(2, 0) +#define DP83869_STRAP_OP_MODE_MASK GENMASK(11, 9) #define DP83869_STRAP_STS1_RESERVED BIT(11) #define DP83869_STRAP_MIRROR_ENABLED BIT(12) @@ -528,7 +528,7 @@ static int dp83869_set_strapped_mode(struct phy_device *phydev) if (val < 0) return val; - dp83869->mode = val & DP83869_STRAP_OP_MODE_MASK; + dp83869->mode = FIELD_GET(DP83869_STRAP_OP_MODE_MASK, val); return 0; } From 34892cfec0c2d96787c4be7bda0d5f18d7dacf85 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:01 +0200 Subject: [PATCH 256/263] net: tls: Change async resync helpers argument Update tls_offload_rx_resync_async_request_start() and tls_offload_rx_resync_async_request_end() to get a struct tls_offload_resync_async parameter directly, rather than extracting it from struct sock. This change aligns the function signatures with the upcoming tls_offload_rx_resync_async_request_cancel() helper, which will be introduced in a subsequent patch. Signed-off-by: Shahar Shitrit Reviewed-by: Sabrina Dubroca Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-2-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 9 ++++++-- include/net/tls.h | 21 +++++++------------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index d7a11ff9bbdb..5fbc92269585 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -425,12 +425,14 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, { struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; struct mlx5e_ktls_offload_context_rx *priv_rx; + struct tls_offload_context_rx *rx_ctx; u8 tracker_state, auth_state, *ctx; struct device *dev; u32 hw_seq; priv_rx = buf->priv_rx; dev = mlx5_core_dma_dev(sq->channel->mdev); + rx_ctx = tls_offload_ctx_rx(tls_get_ctx(priv_rx->sk)); if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) goto out; @@ -447,7 +449,8 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, } hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); - tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); + tls_offload_rx_resync_async_request_end(rx_ctx->resync_async, + cpu_to_be32(hw_seq)); priv_rx->rq_stats->tls_resync_req_end++; out: mlx5e_ktls_priv_rx_put(priv_rx); @@ -482,6 +485,7 @@ static bool resync_queue_get_psv(struct sock *sk) static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) { struct ethhdr *eth = (struct ethhdr *)(skb->data); + struct tls_offload_resync_async *resync_async; struct net_device *netdev = rq->netdev; struct net *net = dev_net(netdev); struct sock *sk = NULL; @@ -527,7 +531,8 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) seq = th->seq; datalen = skb->len - depth; - tls_offload_rx_resync_async_request_start(sk, seq, datalen); + resync_async = tls_offload_ctx_rx(tls_get_ctx(sk))->resync_async; + tls_offload_rx_resync_async_request_start(resync_async, seq, datalen); rq->stats->tls_resync_req_start++; unref: diff --git a/include/net/tls.h b/include/net/tls.h index 857340338b69..b90f3b675c3c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -451,25 +451,20 @@ static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq) /* Log all TLS record header TCP sequences in [seq, seq+len] */ static inline void -tls_offload_rx_resync_async_request_start(struct sock *sk, __be32 seq, u16 len) +tls_offload_rx_resync_async_request_start(struct tls_offload_resync_async *resync_async, + __be32 seq, u16 len) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_async->req, ((u64)ntohl(seq) << 32) | + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | ((u64)len << 16) | RESYNC_REQ | RESYNC_REQ_ASYNC); - rx_ctx->resync_async->loglen = 0; - rx_ctx->resync_async->rcd_delta = 0; + resync_async->loglen = 0; + resync_async->rcd_delta = 0; } static inline void -tls_offload_rx_resync_async_request_end(struct sock *sk, __be32 seq) +tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_async, + __be32 seq) { - struct tls_context *tls_ctx = tls_get_ctx(sk); - struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx); - - atomic64_set(&rx_ctx->resync_async->req, - ((u64)ntohl(seq) << 32) | RESYNC_REQ); + atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); } static inline void From c15d5c62ab313c19121f10e25d4fec852bd1c40c Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:02 +0200 Subject: [PATCH 257/263] net: tls: Cancel RX async resync request on rcd_delta overflow When a netdev issues a RX async resync request for a TLS connection, the TLS module handles it by logging record headers and attempting to match them to the tcp_sn provided by the device. If a match is found, the TLS module approves the tcp_sn for resynchronization. While waiting for a device response, the TLS module also increments rcd_delta each time a new TLS record is received, tracking the distance from the original resync request. However, if the device response is delayed or fails (e.g due to unstable connection and device getting out of tracking, hardware errors, resource exhaustion etc.), the TLS module keeps logging and incrementing, which can lead to a WARN() when rcd_delta exceeds the threshold. To address this, introduce tls_offload_rx_resync_async_request_cancel() to explicitly cancel resync requests when a device response failure is detected. Call this helper also as a final safeguard when rcd_delta crosses its threshold, as reaching this point implies that earlier cancellation did not occur. Signed-off-by: Shahar Shitrit Reviewed-by: Sabrina Dubroca Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-3-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/tls.h | 6 ++++++ net/tls/tls_device.c | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/net/tls.h b/include/net/tls.h index b90f3b675c3c..c7bcdb3afad7 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -467,6 +467,12 @@ tls_offload_rx_resync_async_request_end(struct tls_offload_resync_async *resync_ atomic64_set(&resync_async->req, ((u64)ntohl(seq) << 32) | RESYNC_REQ); } +static inline void +tls_offload_rx_resync_async_request_cancel(struct tls_offload_resync_async *resync_async) +{ + atomic64_set(&resync_async->req, 0); +} + static inline void tls_offload_rx_resync_set_type(struct sock *sk, enum tls_offload_sync_type type) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index a64ae15b1a60..71734411ff4c 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -723,8 +723,10 @@ tls_device_rx_resync_async(struct tls_offload_resync_async *resync_async, /* shouldn't get to wraparound: * too long in async stage, something bad happened */ - if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) + if (WARN_ON_ONCE(resync_async->rcd_delta == USHRT_MAX)) { + tls_offload_rx_resync_async_request_cancel(resync_async); return false; + } /* asynchronous stage: log all headers seq such that * req_seq <= seq <= end_seq, and wait for real resync request From 426e9da3b28404b1edcbae401231fb378150d99d Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Sun, 26 Oct 2025 22:03:03 +0200 Subject: [PATCH 258/263] net/mlx5e: kTLS, Cancel RX async resync request in error flows When device loses track of TLS records, it attempts to resync by monitoring records and requests an asynchronous resynchronization from software for this TLS connection. The TLS module handles such device RX resync requests by logging record headers and comparing them with the record tcp_sn when provided by the device. It also increments rcd_delta to track how far the current record tcp_sn is from the tcp_sn of the original resync request. If the device later responds with a matching tcp_sn, the TLS module approves the tcp_sn for resync. However, the device response may be delayed or never arrive, particularly due to traffic-related issues such as packet drops or reordering. In such cases, the TLS module remains unaware that resync will not complete, and continues performing unnecessary work by logging headers and incrementing rcd_delta, which can eventually exceed the threshold and trigger a WARN(). For example, this was observed when the device got out of tracking, causing mlx5e_ktls_handle_get_psv_completion() to fail and ultimately leading to the rcd_delta warning. To address this, call tls_offload_rx_resync_async_request_cancel() to cancel the resync request and stop resync tracking in such error cases. Also, increment the tls_resync_req_skip counter to track these cancellations. Fixes: 0419d8c9d8f8 ("net/mlx5e: kTLS, Add kTLS RX resync support") Signed-off-by: Shahar Shitrit Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/1761508983-937977-4-git-send-email-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en_accel/ktls_rx.c | 34 ++++++++++++++++--- .../mellanox/mlx5/core/en_accel/ktls_txrx.h | 4 +++ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 5fbc92269585..da2d1eb52c13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -320,7 +320,6 @@ resync_post_get_progress_params(struct mlx5e_icosq *sq, err_free: kfree(buf); err_out: - priv_rx->rq_stats->tls_resync_req_skip++; return err; } @@ -339,14 +338,19 @@ static void resync_handle_work(struct work_struct *work) if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { mlx5e_ktls_priv_rx_put(priv_rx); + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&resync->core); return; } c = resync->priv->channels.c[priv_rx->rxq]; sq = &c->async_icosq; - if (resync_post_get_progress_params(sq, priv_rx)) + if (resync_post_get_progress_params(sq, priv_rx)) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&resync->core); mlx5e_ktls_priv_rx_put(priv_rx); + } } static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, @@ -425,6 +429,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, { struct mlx5e_ktls_rx_resync_buf *buf = wi->tls_get_params.buf; struct mlx5e_ktls_offload_context_rx *priv_rx; + struct tls_offload_resync_async *async_resync; struct tls_offload_context_rx *rx_ctx; u8 tracker_state, auth_state, *ctx; struct device *dev; @@ -433,8 +438,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, priv_rx = buf->priv_rx; dev = mlx5_core_dma_dev(sq->channel->mdev); rx_ctx = tls_offload_ctx_rx(tls_get_ctx(priv_rx->sk)); - if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) + async_resync = rx_ctx->resync_async; + if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(async_resync); goto out; + } dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); @@ -445,11 +454,12 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING || auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) { priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(async_resync); goto out; } hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn); - tls_offload_rx_resync_async_request_end(rx_ctx->resync_async, + tls_offload_rx_resync_async_request_end(async_resync, cpu_to_be32(hw_seq)); priv_rx->rq_stats->tls_resync_req_end++; out: @@ -475,8 +485,10 @@ static bool resync_queue_get_psv(struct sock *sk) resync = &priv_rx->resync; mlx5e_ktls_priv_rx_get(priv_rx); - if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) + if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) { mlx5e_ktls_priv_rx_put(priv_rx); + return false; + } return true; } @@ -561,6 +573,18 @@ void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk, resync_handle_seq_match(priv_rx, c); } +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi) +{ + struct mlx5e_ktls_offload_context_rx *priv_rx; + struct mlx5e_ktls_rx_resync_buf *buf; + + buf = wi->tls_get_params.buf; + priv_rx = buf->priv_rx; + priv_rx->rq_stats->tls_resync_req_skip++; + tls_offload_rx_resync_async_request_cancel(&priv_rx->resync.core); +} + /* End of resync section */ void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index f87b65c560ea..cb08799769ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -29,6 +29,10 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc); + +void +mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi); + static inline bool mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1c79adc51a04..26621a2972ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1036,6 +1036,10 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) netdev_WARN_ONCE(cq->netdev, "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe)); +#ifdef CONFIG_MLX5_EN_TLS + if (wi->wqe_type == MLX5E_ICOSQ_WQE_GET_PSV_TLS) + mlx5e_ktls_rx_resync_async_request_cancel(wi); +#endif mlx5e_dump_error_cqe(&sq->cq, sq->sqn, (struct mlx5_err_cqe *)cqe); mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); From c657f86106c8729240e1f50a62c6606b578ecf20 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Tue, 28 Oct 2025 11:18:43 +0800 Subject: [PATCH 259/263] net: stmmac: vlan: Disable 802.1AD tag insertion offload The DWMAC IP's VLAN tag insertion offload does not support inserting STAG (802.1AD) and CTAG (802.1Q) types in bytes 13 and 14 using the same MAC_VLAN_Incl and MAC_VLAN_Inner_Incl register configurations. Currently, MAC_VLAN_Incl is configured to offload only STAG type insertion. However, the DWMAC IP inserts a CTAG type when the inner VLAN ID field of the descriptor is not configured, and a STAG type when it is configured. This behavior is not documented and leads to inconsistent double VLAN tagging. Additionally, an unexpected CTAG with VLAN ID 0 is inserted, resulting in frames like: Frame 1: 110 bytes on wire (880 bits), 110 bytes captured (880 bits) Ethernet II, Src: (), Dst: () IEEE 802.1ad, ID: 100 802.1Q Virtual LAN, PRI: 0, DEI: 0, ID: 0 (unexpected) 802.1Q Virtual LAN, PRI: 0, DEI: 0, ID: 200 Internet Protocol Version 4, Src: 192.168.4.10, Dst: 192.168.4.11 Internet Control Message Protocol To avoid this undocumented and incorrect behavior, disable 802.1AD tag insertion offload. Also, don't set CSVL bit. As per the data book, when this bit is set, S-VLAN type (0x88A8) is inserted in the 13th and 14th bytes of transmitted packets and when this bit is reset, C-VLAN type (0x8100) is inserted in the 13th and 14th bytes of transmitted packets. Fixes: 30d932279dc2 ("net: stmmac: Add support for VLAN Insertion Offload") Fixes: e94e3f3b51ce ("net: stmmac: Add support for VLAN Insertion Offload in GMAC4+") Fixes: 1d2c7a5fee31 ("net: stmmac: Refactor VLAN implementation") Signed-off-by: Rohan G Thomas Reviewed-by: Boon Khai Ng Link: https://patch.msgid.link/20251028-qbv-fixes-v4-1-26481c7634e3@altera.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 ++++-------------- .../net/ethernet/stmicro/stmmac/stmmac_vlan.c | 2 +- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 650d75b73e0b..5b452469ad2c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4089,18 +4089,11 @@ static int stmmac_release(struct net_device *dev) static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, struct stmmac_tx_queue *tx_q) { - u16 tag = 0x0, inner_tag = 0x0; - u32 inner_type = 0x0; struct dma_desc *p; + u16 tag = 0x0; - if (!priv->dma_cap.vlins) + if (!priv->dma_cap.vlins || !skb_vlan_tag_present(skb)) return false; - if (!skb_vlan_tag_present(skb)) - return false; - if (skb->vlan_proto == htons(ETH_P_8021AD)) { - inner_tag = skb_vlan_tag_get(skb); - inner_type = STMMAC_VLAN_INSERT; - } tag = skb_vlan_tag_get(skb); @@ -4109,7 +4102,7 @@ static bool stmmac_vlan_insert(struct stmmac_priv *priv, struct sk_buff *skb, else p = &tx_q->dma_tx[tx_q->cur_tx]; - if (stmmac_set_desc_vlan_tag(priv, p, tag, inner_tag, inner_type)) + if (stmmac_set_desc_vlan_tag(priv, p, tag, 0x0, 0x0)) return false; stmmac_set_tx_owner(priv, p); @@ -7573,11 +7566,8 @@ int stmmac_dvr_probe(struct device *device, ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->features |= NETIF_F_HW_VLAN_STAG_FILTER; } - if (priv->dma_cap.vlins) { + if (priv->dma_cap.vlins) ndev->features |= NETIF_F_HW_VLAN_CTAG_TX; - if (priv->dma_cap.dvlan) - ndev->features |= NETIF_F_HW_VLAN_STAG_TX; - } #endif priv->msg_enable = netif_msg_init(debug, default_msg_level); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c index 0b6f6228ae35..ff02a79c00d4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_vlan.c @@ -212,7 +212,7 @@ static void vlan_enable(struct mac_device_info *hw, u32 type) value = readl(ioaddr + VLAN_INCL); value |= VLAN_VLTI; - value |= VLAN_CSVL; /* Only use SVLAN */ + value &= ~VLAN_CSVL; /* Only use CVLAN */ value &= ~VLAN_VLC; value |= (type << VLAN_VLC_SHIFT) & VLAN_VLC; writel(value, ioaddr + VLAN_INCL); From ded9813d17d3dd50a08e7a2ca1495769ef9c6673 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Tue, 28 Oct 2025 11:18:44 +0800 Subject: [PATCH 260/263] net: stmmac: Consider Tx VLAN offload tag length for maxSDU Queue maxSDU requirement of 802.1 Qbv standard requires mac to drop packets that exceeds maxSDU length and maxSDU doesn't include preamble, destination and source address, or FCS but includes ethernet type and VLAN header. On hardware with Tx VLAN offload enabled, VLAN header length is not included in the skb->len, when Tx VLAN offload is requested. This leads to incorrect length checks and allows transmission of oversized packets. Add the VLAN_HLEN to the skb->len before checking the Qbv maxSDU if Tx VLAN offload is requested for the packet. Fixes: c5c3e1bfc9e0 ("net: stmmac: Offload queueMaxSDU from tc-taprio") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20251028-qbv-fixes-v4-2-26481c7634e3@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 5b452469ad2c..7b90ecd3a55e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4500,6 +4500,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) bool has_vlan, set_ic; int entry, first_tx; dma_addr_t des; + u32 sdu_len; tx_q = &priv->dma_conf.tx_queue[queue]; txq_stats = &priv->xstats.txq_stats[queue]; @@ -4517,10 +4518,15 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) } if (priv->est && priv->est->enable && - priv->est->max_sdu[queue] && - skb->len > priv->est->max_sdu[queue]){ - priv->xstats.max_sdu_txq_drop[queue]++; - goto max_sdu_err; + priv->est->max_sdu[queue]) { + sdu_len = skb->len; + /* Add VLAN tag length if VLAN tag insertion offload is requested */ + if (priv->dma_cap.vlins && skb_vlan_tag_present(skb)) + sdu_len += VLAN_HLEN; + if (sdu_len > priv->est->max_sdu[queue]) { + priv->xstats.max_sdu_txq_drop[queue]++; + goto max_sdu_err; + } } if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { From 48b2e323c018c4c908ae5acabff326647bab5240 Mon Sep 17 00:00:00 2001 From: Rohan G Thomas Date: Tue, 28 Oct 2025 11:18:45 +0800 Subject: [PATCH 261/263] net: stmmac: est: Fix GCL bounds checks Fix the bounds checks for the hw supported maximum GCL entry count and gate interval time. Fixes: b60189e0392f ("net: stmmac: Integrate EST with TAPRIO scheduler API") Signed-off-by: Rohan G Thomas Reviewed-by: Matthew Gerlach Link: https://patch.msgid.link/20251028-qbv-fixes-v4-3-26481c7634e3@altera.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 97e89a604abd..3b4d4696afe9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -981,7 +981,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, if (qopt->cmd == TAPRIO_CMD_DESTROY) goto disable; - if (qopt->num_entries >= dep) + if (qopt->num_entries > dep) return -EINVAL; if (!qopt->cycle_time) return -ERANGE; @@ -1012,7 +1012,7 @@ static int tc_taprio_configure(struct stmmac_priv *priv, s64 delta_ns = qopt->entries[i].interval; u32 gates = qopt->entries[i].gate_mask; - if (delta_ns > GENMASK(wid, 0)) + if (delta_ns > GENMASK(wid - 1, 0)) return -ERANGE; if (gates > GENMASK(31 - wid, 0)) return -ERANGE; From 6a2108c78069fda000729b88c97b1eba0405e6d7 Mon Sep 17 00:00:00 2001 From: Shivaji Kant Date: Wed, 29 Oct 2025 06:54:19 +0000 Subject: [PATCH 262/263] net: devmem: refresh devmem TX dst in case of route invalidation The zero-copy Device Memory (Devmem) transmit path relies on the socket's route cache (`dst_entry`) to validate that the packet is being sent via the network device to which the DMA buffer was bound. However, this check incorrectly fails and returns `-ENODEV` if the socket's route cache entry (`dst`) is merely missing or expired (`dst == NULL`). This scenario is observed during network events, such as when flow steering rules are deleted, leading to a temporary route cache invalidation. This patch fixes -ENODEV error for `net_devmem_get_binding()` by doing the following: 1. It attempts to rebuild the route via `rebuild_header()` if the route is initially missing (`dst == NULL`). This allows the TCP/IP stack to recover from transient route cache misses. 2. It uses `rcu_read_lock()` and `dst_dev_rcu()` to safely access the network device pointer (`dst_dev`) from the route, preventing use-after-free conditions if the device is concurrently removed. 3. It maintains the critical safety check by validating that the retrieved destination device (`dst_dev`) is exactly the device registered in the Devmem binding (`binding->dev`). These changes prevent unnecessary ENODEV failures while maintaining the critical safety requirement that the Devmem resources are only used on the bound network device. Reviewed-by: Bobby Eshleman Reported-by: Eric Dumazet Reported-by: Vedant Mathur Suggested-by: Eric Dumazet Fixes: bd61848900bf ("net: devmem: Implement TX path") Signed-off-by: Shivaji Kant Link: https://patch.msgid.link/20251029065420.3489943-1-shivajikant@google.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index d9de31a6cc7f..1d04754bc756 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "devmem.h" @@ -357,7 +358,8 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, unsigned int dmabuf_id) { struct net_devmem_dmabuf_binding *binding; - struct dst_entry *dst = __sk_dst_get(sk); + struct net_device *dst_dev; + struct dst_entry *dst; int err = 0; binding = net_devmem_lookup_dmabuf(dmabuf_id); @@ -366,16 +368,35 @@ struct net_devmem_dmabuf_binding *net_devmem_get_binding(struct sock *sk, goto out_err; } + rcu_read_lock(); + dst = __sk_dst_get(sk); + /* If dst is NULL (route expired), attempt to rebuild it. */ + if (unlikely(!dst)) { + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) { + err = -EHOSTUNREACH; + goto out_unlock; + } + dst = __sk_dst_get(sk); + if (unlikely(!dst)) { + err = -ENODEV; + goto out_unlock; + } + } + /* The dma-addrs in this binding are only reachable to the corresponding * net_device. */ - if (!dst || !dst->dev || dst->dev->ifindex != binding->dev->ifindex) { + dst_dev = dst_dev_rcu(dst); + if (unlikely(!dst_dev) || unlikely(dst_dev != binding->dev)) { err = -ENODEV; - goto out_err; + goto out_unlock; } + rcu_read_unlock(); return binding; +out_unlock: + rcu_read_unlock(); out_err: if (binding) net_devmem_dmabuf_binding_put(binding); From 51e5ad549c43b557c7da1e4d1a1dcf061b4a5f6c Mon Sep 17 00:00:00 2001 From: Ranganath V N Date: Sun, 26 Oct 2025 22:03:12 +0530 Subject: [PATCH 263/263] net: sctp: fix KMSAN uninit-value in sctp_inq_pop Fix an issue detected by syzbot: KMSAN reported an uninitialized-value access in sctp_inq_pop BUG: KMSAN: uninit-value in sctp_inq_pop The issue is actually caused by skb trimming via sk_filter() in sctp_rcv(). In the reproducer, skb->len becomes 1 after sk_filter(), which bypassed the original check: if (skb->len < sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) + skb_transport_offset(skb)) To handle this safely, a new check should be performed after sk_filter(). Reported-by: syzbot+d101e12bccd4095460e7@syzkaller.appspotmail.com Tested-by: syzbot+d101e12bccd4095460e7@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d101e12bccd4095460e7 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Suggested-by: Xin Long Signed-off-by: Ranganath V N Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251026-kmsan_fix-v3-1-2634a409fa5f@gmail.com Signed-off-by: Paolo Abeni --- net/sctp/input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/input.c b/net/sctp/input.c index 7e99894778d4..e119e460ccde 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -190,7 +190,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_release; nf_reset_ct(skb); - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb) || skb->len < sizeof(struct sctp_chunkhdr)) goto discard_release; /* Create an SCTP packet structure. */